content_urls 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ #--color
data/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ source "http://rubygems.org"
2
+
3
+ gem "nokogiri"
4
+
5
+ group :development do
6
+ gem "rspec", "~> 2.8.0"
7
+ gem "yard", "~> 0.7"
8
+ gem "rdoc", "~> 3.12"
9
+ gem "bundler"
10
+ gem "jeweler", "~> 1.8.4"
11
+ gem "rcov", "0.9.9"
12
+ gem "rake", "~> 0.9.2.2"
13
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 Dennis Sutch
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,63 @@
1
+ = content_urls
2
+
3
+ Find and rewrite URLs in different types of content.
4
+
5
+ ContentUrls was developed to address two use cases:
6
+ * Find each URL in content retrieved from a website in order to spider and find all content on the website.
7
+ * Rewrite each URL in content retrieved from a website in order to make a working local copy of the website.
8
+
9
+ == Features
10
+ * Three types of content: HTML, CSS and JavaScript
11
+ * HTML content
12
+ * <a> tag href attribute
13
+ * <area> tag href attribute
14
+ * <body> tag background attribute
15
+ * <embed> tag src attribute
16
+ * <img> tag src attribute
17
+ * <link> tag href attribute
18
+ * <meta> tag content attribute containing URL
19
+ * <object> tag data attribute
20
+ * <script> tag src attribute
21
+ * style attribute of any tag (parsed as CSS content)
22
+ * body of <style> tag (parsed as CSS content)
23
+ * body of <script> tag when type or language attribute identifies JavaScript (parsed as JavaScript content)
24
+ * CSS content
25
+ * url() notation
26
+ * JavaScript content
27
+ * URI module's REGEXP
28
+
29
+ == Examples
30
+ === Find URLs in an HTML document
31
+ Provide the HTML content and the content type and obtain an array of unique URLs.
32
+ ContentUrls.urls(html, 'text/html').each do |url|
33
+ puts "Found URL: #{url}"
34
+ end
35
+
36
+ === Rewrite URLs in an HTML document
37
+ Provide the HTML content, the content type, and a block to rewrite each URL's extension.
38
+ rewritten_html = ContentUrls.rewrite_each_url(html, 'text/html') {|url| url.sub(/.htm/, '.html'}
39
+
40
+ == Requirements
41
+ * nokogiri
42
+
43
+ == Development
44
+ To test and develop this gem, additional requirements are:
45
+ * bundler
46
+ * jeweler
47
+ * rake
48
+ * rcov
49
+ * rdoc
50
+ * rspec
51
+ * yard"
52
+
53
+ == Contributing to content_urls
54
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
55
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
56
+ * Fork the project.
57
+ * Start a feature/bugfix branch.
58
+ * Commit and push until you are happy with your contribution.
59
+ * Make sure to add tests for it. This is important so I don't unintentionally break it in a future version.
60
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
61
+
62
+ == Copyright
63
+ Copyright (c) 2012 Dennis Sutch. See LICENSE.txt for further details.
@@ -0,0 +1,42 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "content_urls"
18
+ gem.homepage = "http://github.com/sutch/content_urls"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Find and rewrite URLs in different types of content.}
21
+ gem.description = %Q{Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs.}
22
+ gem.email = "dennis@sutch.com"
23
+ gem.authors = ["Dennis Sutch"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'yard'
42
+ YARD::Rake::YardocTask.new
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,78 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "content_urls"
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Dennis Sutch"]
12
+ s.date = "2012-10-03"
13
+ s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
14
+ s.email = "dennis@sutch.com"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rspec",
22
+ "Gemfile",
23
+ "LICENSE.txt",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "content_urls.gemspec",
28
+ "lib/content_urls.rb",
29
+ "lib/content_urls/parsers/css_parser.rb",
30
+ "lib/content_urls/parsers/html_parser.rb",
31
+ "lib/content_urls/parsers/java_script_parser.rb",
32
+ "lib/content_urls/version.rb",
33
+ "spec/content_urls_spec.rb",
34
+ "spec/css_parser_spec.rb",
35
+ "spec/html_parser_spec.rb",
36
+ "spec/java_script_parser_spec.rb",
37
+ "spec/spec_helper.rb"
38
+ ]
39
+ s.homepage = "http://github.com/sutch/content_urls"
40
+ s.licenses = ["MIT"]
41
+ s.require_paths = ["lib"]
42
+ s.rubygems_version = "1.8.23"
43
+ s.summary = "Find and rewrite URLs in different types of content."
44
+
45
+ if s.respond_to? :specification_version then
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
50
+ s.add_development_dependency(%q<rspec>, ["~> 2.8.0"])
51
+ s.add_development_dependency(%q<yard>, ["~> 0.7"])
52
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
53
+ s.add_development_dependency(%q<bundler>, [">= 0"])
54
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
55
+ s.add_development_dependency(%q<rcov>, ["= 0.9.9"])
56
+ s.add_development_dependency(%q<rake>, ["~> 0.9.2.2"])
57
+ else
58
+ s.add_dependency(%q<nokogiri>, [">= 0"])
59
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
60
+ s.add_dependency(%q<yard>, ["~> 0.7"])
61
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
62
+ s.add_dependency(%q<bundler>, [">= 0"])
63
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
64
+ s.add_dependency(%q<rcov>, ["= 0.9.9"])
65
+ s.add_dependency(%q<rake>, ["~> 0.9.2.2"])
66
+ end
67
+ else
68
+ s.add_dependency(%q<nokogiri>, [">= 0"])
69
+ s.add_dependency(%q<rspec>, ["~> 2.8.0"])
70
+ s.add_dependency(%q<yard>, ["~> 0.7"])
71
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
72
+ s.add_dependency(%q<bundler>, [">= 0"])
73
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
74
+ s.add_dependency(%q<rcov>, ["= 0.9.9"])
75
+ s.add_dependency(%q<rake>, ["~> 0.9.2.2"])
76
+ end
77
+ end
78
+
@@ -0,0 +1,107 @@
1
+ require 'content_urls/version'
2
+ require 'uri'
3
+
4
+ # +ContentUrls+ parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs.
5
+ #
6
+ class ContentUrls
7
+
8
+ # Returns the URLs found in the content.
9
+ #
10
+ # @param [String] content the content.
11
+ # @param [String] type the media type of the content.
12
+ # @return [Array] the unique URLs found in the content.
13
+ #
14
+ # @example Parse HTML code for URLs
15
+ # content = '<html><a href="index.html">Home</a></html>'
16
+ # ContentUrls.urls(content, 'text/html').each do |url|
17
+ # puts "Found URL: #{url}"
18
+ # end
19
+ # # => "Found URL: index.html"
20
+ #
21
+ # @example Parse content obtained from a robot
22
+ # response = Net::HTTP.get_response(URI('http://example.com/sample-1'))
23
+ # puts "URLs found at http://example.com/sample-1:"
24
+ # ContentUrls.urls(response.body, response.content_type).each do |url|
25
+ # puts " #{url}"
26
+ # end
27
+ # # => [a list of URLs found in the content located at http://example.com/sample-1]
28
+ #
29
+ def self.urls(content, type)
30
+ urls = []
31
+ if (parser = get_parser(type))
32
+ parser.new(content).urls.each { |url| urls << url }
33
+ end
34
+ urls
35
+ end
36
+
37
+ # Rewrites each URL in the content by calling the supplied block with each URL.
38
+ #
39
+ # @param [String] content the HTML content.
40
+ # @param [String] type the media type of the content.
41
+ # @returns [string] content the rewritten content.
42
+ #
43
+ # @example Rewrite URLs in HTML code
44
+ # content = '<html><a href="index.htm">Home</a></html>'
45
+ # content = ContentUrls.rewrite_each_url(content, 'text/html') {|url| 'gone.html'}
46
+ # puts "Rewritten: #{content}"
47
+ # # => "Rewritten: <html><a href="gone.html">Home</a></html>"
48
+ #
49
+ def self.rewrite_each_url(content, type, &block)
50
+ if (parser = get_parser(type))
51
+ parser.rewrite_each_url(content) do |url|
52
+ replacement = yield url
53
+ (replacement.nil? ? url : replacement)
54
+ end
55
+ end
56
+ content
57
+ end
58
+
59
+ # Convert a relative URL to an absolute URL using base_url (for example, the content's original location or an HTML document's href attribute of the base tag).
60
+ #
61
+ # @example Obtain absolute URL of "../index.html" of page obtained from "http://example.com/one/two/sample.html"
62
+ # puts ContentUrls.to_absolute("../index.html", "http://example.com/folder/sample.html")
63
+ # # => "http://example.com/index.html"
64
+ #
65
+ def self.to_absolute(url, base_url)
66
+ return nil if url.nil?
67
+
68
+ url = URI.encode(URI.decode(url.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))) # remove anchor
69
+ absolute = URI(base_url).merge(url)
70
+ absolute.path = '/' if absolute.path.empty?
71
+ absolute.to_s
72
+ end
73
+
74
+ protected
75
+
76
+ @@type_parser = Hash.new { |hash, key| hash[key] = [] } # mapping of type regex to parser class
77
+
78
+ # Register a parser implementation class for one or more content type regular expressions
79
+ def self.register_parser(parser_class, *type_regexes)
80
+ type_regexes.each do |regex|
81
+ @@type_parser[regex].push parser_class
82
+ end
83
+ end
84
+
85
+ # Return parser for a file type or nil if content type not recognized
86
+ def self.get_parser(type)
87
+ @@type_parser.each_pair do |regex, parser|
88
+ if type =~ regex
89
+ return parser.first
90
+ end
91
+ end
92
+ return nil
93
+ end
94
+
95
+ # Parser implementations
96
+ # - each implementation's urls method should return unique URLs
97
+
98
+ require 'content_urls/parsers/html_parser'
99
+ register_parser ContentUrls::HtmlParser, %r{^(text/html)\b}, %r{^(application/xhtml+xml)\b}
100
+
101
+ require 'content_urls/parsers/css_parser'
102
+ register_parser ContentUrls::CssParser, %r{^(text/css)\b}
103
+
104
+ require 'content_urls/parsers/java_script_parser'
105
+ register_parser ContentUrls::JavaScriptParser, %r{^(application/x-javascript)\b}, %r{^(application/javascript)\b}, %r{^(text/javascript)\b}
106
+
107
+ end
@@ -0,0 +1,126 @@
1
+ class ContentUrls
2
+
3
+ # +CssParser+ finds and rewrites URLs in CSS content.
4
+ #
5
+ # === Implementation note:
6
+ # This methods in this class identify URLs by using regular expressions based on the W3C CSS 2.1 Specification (http://www.w3.org/TR/CSS21/syndata.html).
7
+ class CssParser
8
+
9
+ # Returns the URLs found in the CSS content.
10
+ #
11
+ # @param [String] content the CSS content.
12
+ # @return [Array] the unique URLs found in the content.
13
+ #
14
+ # @example Parse CSS code for URLs
15
+ # css = 'body { background: url(/images/rainbows.jpg) }'
16
+ # ContentUrls::CssParser.urls(css).each do |url|
17
+ # puts "Found URL: #{url}"
18
+ # end
19
+ # # => "Found URL: /images/rainbows.jpg"
20
+ def self.urls(content)
21
+ urls = []
22
+ remaining = content
23
+ while ! remaining.empty?
24
+ if @@regex_uri =~ remaining
25
+ match = $1
26
+ url = $7 || $14 || $23
27
+ #if @@regex_baduri =~ match ## bad URL
28
+ # remaining = remaining[Regexp.last_match.begin(0)+1..-1] # Use last_match from regex_uri test
29
+ #else
30
+ remaining = Regexp.last_match.post_match
31
+ urls << url
32
+ #end
33
+ else
34
+ remaining = ''
35
+ end
36
+ end
37
+ urls.uniq!
38
+ urls
39
+ end
40
+
41
+ # Rewrites each URL in the CSS content by calling the supplied block with each URL.
42
+ #
43
+ # @param [String] content the CSS content.
44
+ #
45
+ # @example Rewrite URLs in CSS code
46
+ # css = 'body { background: url(/images/rainbows.jpg) }'
47
+ # css = ContentUrls::CssParser.rewrite_each_url(css) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
48
+ # puts "Rewritten: #{css}"
49
+ # # => "Rewritten: body { background: url(/images/unicorns.jpg) }"
50
+ #
51
+ def self.rewrite_each_url(content, &block)
52
+ done = false
53
+ remaining = content
54
+ rewritten = ''
55
+ while ! remaining.empty?
56
+ if match = @@regex_uri.match(remaining)
57
+ url = match[7] || match[14] || match[23]
58
+ rewritten += match.pre_match
59
+ remaining = match.post_match
60
+ replacement = yield url
61
+ rewritten += (replacement.nil? ? match[0] : match[0].sub(url, replacement))
62
+ else
63
+ rewritten += remaining
64
+ remaining = ''
65
+ end
66
+ end
67
+ return rewritten
68
+ end
69
+
70
+ protected
71
+
72
+ # Regular expressions based on http://www.w3.org/TR/CSS21/syndata.html
73
+
74
+ # {w}: [ \t\r\n\f]*
75
+ @@w = '([ \t\r\n\f]*)'
76
+
77
+ # {nl}: \n|\r\n|\r|\f
78
+ @@nl = '(\n|\r\n|\r|\f)'
79
+
80
+ # {unicode}: \\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?
81
+ @@unicode = '(\\\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?)'
82
+
83
+ # {escape}: {unicode}|\\[^\n\r\f0-9a-f]
84
+ @@escape = '(' + @@unicode + '|\\\\[^\n\r\f0-9a-f])'
85
+
86
+ # {string1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
87
+ @@string1 = '(\"(([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*)\")'
88
+
89
+ # {string2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\'
90
+ @@string2 = '(\\\'(([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*)\\\')'
91
+
92
+ # {string}: {string1}|{string2}
93
+ @@string = '(' + @@string1 + '|' + @@string2 + ')'
94
+
95
+ # {nonascii}: [^\0-\237]
96
+ @@nonascii = '([^\x0-\x237])'
97
+
98
+ # {uri}: url\({w}{string}{w}\)|url\({w}([!#$%&*-\[\]-~]|{nonascii}|{escape})*{w}\)
99
+ @@uri = '(((url\(' + @@w + @@string + @@w + '\))|(url\(' + @@w + '(([!#$%&*-\[\]-~]|' + @@nonascii + '|' + @@escape + ')*)' + @@w + '\))))'
100
+
101
+ # {badstring1}: \"([^\n\r\f\\"]|\\{nl}|{escape})*\\?
102
+ @@badstring1 = '(\"([^\n\r\f\\\\"]|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
103
+
104
+ # {badstring2}: \'([^\n\r\f\\']|\\{nl}|{escape})*\\?
105
+ @@badstring2 = '(\\\'([^\n\r\f\\\\\']|\\\\' + @@nl + '|' + @@escape + ')*\\\\?)'
106
+
107
+ # {badstring}: {badstring1}|{badstring2}
108
+ @@badstring = '(' + @@badstring1 + '|' + @@badstring2 + ')'
109
+
110
+ # {baduri1}: url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}
111
+ @@baduri1 = '(url\(' + @@w + '([!#$%&*-~]|' + @@nonascii + '|' + @@escape + ')*' + @@w + ')'
112
+
113
+ # {baduri2}: url\({w}{string}{w}
114
+ @@baduri2 = '(url\(' + @@w + @@string + @@w + ')'
115
+
116
+ # {baduri3}: url\({w}{badstring}
117
+ @@baduri3 = '(url\(' + @@w + @@badstring + ')'
118
+
119
+ # {baduri}: {baduri1}|{baduri2}|{baduri3}
120
+ @@baduri = '(' + @@baduri1 + '|' + @@baduri2 + '|' + @@baduri3 + ')'
121
+
122
+ @@regex_uri = Regexp.new(@@uri)
123
+ @@regex_baduri = Regexp.new(@@baduri)
124
+
125
+ end
126
+ end
@@ -0,0 +1,150 @@
1
+ require 'nokogiri'
2
+
3
+ class ContentUrls
4
+
5
+ # +HtmlParser+ finds and rewrites URLs in HTML content.
6
+ #
7
+ # === Implementation note:
8
+ # This methods in this class use Nokogiri to identify URLs. Nokogiri cleans HTML code when rewriting, so expect some changes to rewritten content.
9
+ class HtmlParser
10
+
11
+ # Returns the URLs found in the HTML content.
12
+ #
13
+ # @param [String] content the HTML content.
14
+ # @return [Array] the unique URLs found in the content.
15
+ #
16
+ # @example Parse HTML code for URLs
17
+ # html = '<html><a href="index.htm">Click me</a></html>'
18
+ # ContentUrls::HtmlParser.urls(html).each do |url|
19
+ # puts "Found URL: #{url}"
20
+ # end
21
+ # # => "Found URL: index.htm"
22
+ #
23
+ def self.urls(content)
24
+ doc = Nokogiri::HTML(content) if content rescue nil
25
+ urls = []
26
+ return urls if !doc
27
+
28
+ rewrite_each_url(content) { |url| urls << url; url }
29
+ urls.uniq!
30
+ urls
31
+ end
32
+
33
+ # Rewrites each URL in the HTML content by calling the supplied block with each URL.
34
+ #
35
+ # @param [String] content the HTML content.
36
+ #
37
+ # @example Rewrite URLs in HTML code
38
+ # html = '<html><a href="index.htm">Click me</a></html>'
39
+ # html = ContentUrls::HtmlParser.rewrite_each_url(html) {|url| 'index.php'}
40
+ # puts "Rewritten: #{html}"
41
+ # # => "Rewritten: <html><a href="index.php">Click me</a></html>"
42
+ #
43
+ def self.rewrite_each_url(content, &block)
44
+ doc = Nokogiri::HTML(content) if content rescue nil
45
+ return nil if !doc
46
+
47
+ # TODO: handle href attribute of base tag
48
+ # - should href URL be changed?
49
+ # - should relative URLs be modified using base?
50
+ # - how should rewritten relative URLs be handled?
51
+ base = doc.search('//head/base/@href') # base URI for resolving relative URIs
52
+ base = nil if base && base.to_s.strip.empty?
53
+
54
+ @@parser_definition.each do |type, definition|
55
+ doc.search(definition[:xpath]).each do |obj|
56
+ if definition.has_key?(:attribute) # use tag attribute if provided
57
+ value = obj[definition[:attribute]]
58
+ else # otherwise use tag's content
59
+ value = obj.to_s
60
+ end
61
+ next if value.nil? or value.strip.empty?
62
+
63
+ if definition.has_key?(:parser) # parse value using parser
64
+ ContentUrls.rewrite_each_url(value, definition[:parser]) { |url| yield url }
65
+
66
+ elsif definition.has_key?(:attribute) # rewrite the URL within the attribute
67
+
68
+ if definition.has_key?(:url_regex) # use regex to obtain URL
69
+ if (match = definition[:url_regex].match(value))
70
+ url = yield match[:url]
71
+ next if url.nil? or url.to_s == match.to_s # don't change URL
72
+ obj[definition[:attribute]] = match.pre_match + url.to_s + match.post_match
73
+ end
74
+
75
+ else # value is the URL
76
+ next if value =~ /^#/ # do not capture anchors within the content being parsed
77
+ url = yield value
78
+ next if url.nil? or url.to_s == match.to_s # don't change URL
79
+ #obj[definition[:attribute]] = url.to_s
80
+ obj.set_attribute(definition[:attribute], url.to_s)
81
+ end
82
+ else
83
+ $stderr.puts "WARNING: unable to rewrite URL for #{value.to_s}"
84
+ end
85
+ end
86
+ end
87
+ return doc.to_s
88
+ end # rewrite_each
89
+
90
+ protected
91
+
92
+ @@parser_definition = {
93
+ a_href: {
94
+ xpath: "//a[@href]",
95
+ attribute: 'href'
96
+ },
97
+ area_href: {
98
+ xpath: "//area[@href]",
99
+ attribute: 'href'
100
+ },
101
+ body_background: {
102
+ xpath: "//body[@background]",
103
+ attribute: 'background'
104
+ },
105
+ embed_src: {
106
+ xpath: "//embed[@src]",
107
+ attribute: 'src'
108
+ },
109
+ img_src: {
110
+ xpath: "//img[@src]",
111
+ attribute: 'src'
112
+ },
113
+ link_href: {
114
+ xpath: "//link[@href]",
115
+ attribute: 'href'
116
+ },
117
+ meta_content: {
118
+ xpath: "//meta[((@http-equiv='location') or (@http-equiv='refresh'))
119
+ and @content and contains(@content,';')
120
+ and number(substring-before(@content,';'))=substring-before(@content,';')]",
121
+ attribute: 'content',
122
+ url_regex: %r{^\d+\s*;\s*url\s*=\s*(?<quote>['"]?)(?<url>[^'"]+)\k<quote>$}i # must return named capture of :url containing URL
123
+ },
124
+ object_data: {
125
+ xpath: "//object[@data]",
126
+ attribute: 'data'
127
+ },
128
+ script_src: {
129
+ xpath: "//script[@src]",
130
+ attribute: 'src'
131
+ },
132
+ style_attribute: {
133
+ xpath: "//*[@style]",
134
+ attribute: 'style',
135
+ parser: 'text/css'
136
+ },
137
+ style_tag: {
138
+ xpath: "//style",
139
+ parser: 'text/css'
140
+ },
141
+ javascript: {
142
+ xpath: "//script[(@type='application/javascript')
143
+ or (@type='text/javascript')
144
+ or (@language='javascript')]/text()",
145
+ parser: 'application/x-javascript'
146
+ }
147
+ }
148
+
149
+ end
150
+ end
@@ -0,0 +1,64 @@
1
+ require 'uri'
2
+
3
+ class ContentUrls
4
+
5
+ # +JavaScriptParser+ finds and rewrites URLs in JavaScript content.
6
+ #
7
+ # === Implementation note:
8
+ # This methods in this class identify URLs by locating strings which match +URI+'s regexp.
9
+ class JavaScriptParser
10
+
11
+ # Returns the URLs found in the JavaScript content.
12
+ #
13
+ # @param [String] content the JavaScript content.
14
+ # @return [Array] the unique URLs found in the content.
15
+ #
16
+ # @example Parse JavaScript code for URLs
17
+ # javascript = 'var link="http://example.com/"'
18
+ # ContentUrls::JavaScriptParser.urls(javascript).each do |url|
19
+ # puts "Found URL: #{url}"
20
+ # end
21
+ # # => "Found URL: http://example.com/"
22
+ def self.urls(content)
23
+ urls = []
24
+ URI.extract(content).each { |u| urls << u }
25
+ urls.uniq!
26
+ urls
27
+ end
28
+
29
+ # Rewrites each URL in the JavaScript content by calling the supplied block with each URL.
30
+ #
31
+ # @param [String] content the JavaScript content.
32
+ #
33
+ # @example Rewrite URLs in JavaScript code
34
+ # javascript = 'var link="http://example.com/"'
35
+ # javascript = ContentUrls::JavaScriptParser.rewrite_each_url(javascript) {|url| url.upcase}
36
+ # puts "Rewritten: #{javascript}"
37
+ # # => "Rewritten: var link="HTTP://EXAMPLE.COM/""
38
+ #
39
+ def self.rewrite_each_url(content, &block)
40
+ done = false
41
+ remaining = content
42
+ rewritten = ''
43
+ while ! remaining.empty?
44
+ if match = URI.regexp.match(remaining)
45
+ url = match.to_s
46
+ rewritten += match.pre_match
47
+ replacement = url.nil? ? nil : (yield url)
48
+ if replacement.nil? or replacement == url # no change in URL
49
+ rewritten += url[0]
50
+ remaining = url[1..-1] + match.post_match
51
+ else
52
+ rewritten += replacement
53
+ remaining = match.post_match
54
+ end
55
+ else
56
+ rewritten += remaining
57
+ remaining = ''
58
+ end
59
+ end
60
+ return rewritten
61
+ end
62
+
63
+ end
64
+ end
@@ -0,0 +1,3 @@
1
+ class ContentUrls
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,29 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe ContentUrls.to_absolute(nil, 'http://www.sample.com/') do
4
+ it "returns nil when url is nil" do
5
+ ContentUrls.to_absolute(nil, 'http://www.sample.com/').should eq nil
6
+ end
7
+ end
8
+
9
+ describe ContentUrls.to_absolute('index.html', 'http://www.sample.com/') do
10
+ it "merges url to base_url" do
11
+ ContentUrls.to_absolute('index.html', 'http://www.sample.com/one/two/three/').should eq 'http://www.sample.com/one/two/three/index.html'
12
+ ContentUrls.to_absolute('/index.html', 'http://www.sample.com/one/two/three/').should eq 'http://www.sample.com/index.html'
13
+ ContentUrls.to_absolute('/four/index.html', 'http://www.sample.com/one/two/three/').should eq 'http://www.sample.com/four/index.html'
14
+ ContentUrls.to_absolute('../index.html', 'http://www.sample.com/one/two/three/').should eq 'http://www.sample.com/one/two/index.html'
15
+ ContentUrls.to_absolute('../four/index.html', 'http://www.sample.com/one/two/three/').should eq 'http://www.sample.com/one/two/four/index.html'
16
+ end
17
+ end
18
+
19
+ describe ContentUrls.get_parser('bogus/bogus') do
20
+ it "returns nil when content type is unknown" do
21
+ ContentUrls.get_parser('bogus/bogus').should eq nil
22
+ end
23
+ end
24
+
25
+ describe ContentUrls.register_parser('some_parser_class', %r{^(content/test)\b}) do
26
+ it "returns the class for the content type" do
27
+ ContentUrls.get_parser('content/test').should eq 'some_parser_class'
28
+ end
29
+ end
@@ -0,0 +1,34 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe ContentUrls::CssParser do
4
+ it "should return no URLs given no content" do
5
+ ContentUrls::CssParser.urls('').should eq []
6
+ end
7
+ it "should return no URLs given garbage content" do
8
+ ContentUrls::CssParser.urls('j;alksdjfkladsjflkajdfaksdjfsdj kladjsf lkfjalkdfj lkajdf9458094djjf').should eq []
9
+ end
10
+ end
11
+
12
+ describe ContentUrls::CssParser do
13
+ it "should return the URLs in the content" do
14
+ ContentUrls::CssParser.urls("body {background-image:url('image.png');}").first.should eq 'image.png'
15
+ end
16
+ end
17
+
18
+ describe ContentUrls::CssParser do
19
+ it "should execute the sample code for rewrite_each_url method" do
20
+ output = ''
21
+ css = 'body { background: url(/images/rainbows.jpg) }'
22
+ css = ContentUrls::CssParser.rewrite_each_url(css) {|url| url.sub(/rainbows.jpg/, 'unicorns.jpg')}
23
+ output += "Rewritten: #{css}" + "\n"
24
+ output.should eq %Q{Rewritten: body { background: url(/images/unicorns.jpg) }\n}
25
+ end
26
+ it "should execute sample code for urls method" do
27
+ output = ''
28
+ css = 'body { background: url(/images/rainbows.jpg) }'
29
+ ContentUrls::CssParser.urls(css).each do |url|
30
+ output += "Found URL: #{url}" + "\n"
31
+ end
32
+ output.should eq %Q{Found URL: /images/rainbows.jpg\n}
33
+ end
34
+ end
@@ -0,0 +1,318 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe ContentUrls::HtmlParser do
4
+ it "should return no URLs when given no content" do
5
+ ContentUrls::HtmlParser.urls('').should eq []
6
+ end
7
+ it "should return no URLs when given garbage content" do
8
+ ContentUrls::HtmlParser.urls('j;alksdjfkladsjflkajdfaksdjfsdj kladjsf lkfjalkdfj lkajdf9458094djjf').should eq []
9
+ end
10
+ end
11
+
12
+ describe ContentUrls::HtmlParser do
13
+ it "should return the URLs in the content" do
14
+ ContentUrls::HtmlParser.urls("<a href='index.html").first.should eq 'index.html'
15
+ end
16
+ end
17
+
18
+ describe ContentUrls::HtmlParser do
19
+ it "should parse HTML Sample 1 and return all a links" do
20
+
21
+ html_sample_1 =<<SAMPLE_1
22
+ <html>
23
+ <head>
24
+ <title>HTML Sample 1</title>
25
+ </head>
26
+ <body>
27
+ <h1>HTML Sample 1</h1>
28
+ <a href="a-href-link-1.html"></a>
29
+ <a href="http://www.example.com/1/2/3/a-href-link-2.html"></a>
30
+ <a href="/folder/a-href-link-3.html?a=1"></a>
31
+ </body>
32
+ </html>
33
+ SAMPLE_1
34
+
35
+ urls = ContentUrls::HtmlParser.urls(html_sample_1)
36
+ urls.include?('a-href-link-1.html').should eq true
37
+ urls.include?('http://www.example.com/1/2/3/a-href-link-2.html').should eq true
38
+ urls.include?('/folder/a-href-link-3.html?a=1').should eq true
39
+ end
40
+ end
41
+
42
+ describe ContentUrls::HtmlParser do
43
+ it "should parse HTML Sample 1 and rewrite all a links" do
44
+
45
+ html_sample_1 =<<SAMPLE_1
46
+ <html>
47
+ <head>
48
+ <title>HTML Sample 1</title>
49
+ </head>
50
+ <body>
51
+ <h1>HTML Sample 1</h1>
52
+ <a href="a-href-link-1.html"></a>
53
+ <a href="http://www.example.com/1/2/3/a-href-link-2.html"></a>
54
+ <a href="/folder/a-href-link-3.html?a=1"></a>
55
+ </body>
56
+ </html>
57
+ SAMPLE_1
58
+
59
+ content = ContentUrls::HtmlParser.rewrite_each_url(html_sample_1) do |url|
60
+ url = URI.parse url
61
+ url.path = url.path.sub(/\.html\b/, '.php')
62
+ url
63
+ end
64
+ urls = ContentUrls::HtmlParser.urls(content)
65
+ urls.include?('a-href-link-1.php').should eq true
66
+ urls.include?('http://www.example.com/1/2/3/a-href-link-2.php').should eq true
67
+ urls.include?('/folder/a-href-link-3.php?a=1').should eq true
68
+ end
69
+ end
70
+
71
+ describe ContentUrls::HtmlParser do
72
+ it "should parse HTML Sample 2 and return all 'area href' URLs" do
73
+
74
+ html_sample_2 =<<SAMPLE_2
75
+ <html>
76
+ <head>
77
+ <title>HTML Sample 2</title>
78
+ </head>
79
+ <body>
80
+ <h1>HTML Sample 2</h1>
81
+ <img src="sample.gif" width="200" height="200" alt="Click somewhere" usemap="#sample-map">
82
+ <map name="sample-map">
83
+ <area shape="rect" coords="0,0,100,100" href="area-href-link-1.html" alt="link 1">
84
+ <area shape="circle" coords="150,150,2" href="http://www.example.com/1/2/3/area-href-link-2.html" alt="link 2">
85
+ <area shape="circle" coords="100,180,1" href="/folder/area-href-link-3.html?a=1" alt="link 3">
86
+ </map>
87
+ </body>
88
+ </html>
89
+ SAMPLE_2
90
+
91
+ urls = ContentUrls::HtmlParser.urls(html_sample_2)
92
+ urls.include?('area-href-link-1.html').should eq true
93
+ urls.include?('http://www.example.com/1/2/3/area-href-link-2.html').should eq true
94
+ urls.include?('/folder/area-href-link-3.html?a=1').should eq true
95
+ end
96
+ end
97
+
98
+ describe ContentUrls::HtmlParser do
99
+ it "should parse HTML Sample 3 and return 'body background' URL" do
100
+
101
+ html_sample_3 =<<SAMPLE_3
102
+ <html>
103
+ <head>
104
+ <title>HTML Sample 3</title>
105
+ </head>
106
+ <body background="/images/background.png">
107
+ <h1>HTML Sample 3</h1>
108
+ </body>
109
+ </html>
110
+ SAMPLE_3
111
+
112
+ urls = ContentUrls::HtmlParser.urls(html_sample_3)
113
+ urls.first.should eq '/images/background.png'
114
+ end
115
+ end
116
+
117
+ describe ContentUrls::HtmlParser do
118
+ it "should parse HTML Sample 4 and return 'embed src' URL" do
119
+
120
+ html_sample_4 =<<SAMPLE_4
121
+ <html>
122
+ <head>
123
+ <title>HTML Sample 4</title>
124
+ </head>
125
+ <body>
126
+ <h1>HTML Sample 4</h1>
127
+ <embed src="sample.swf" />
128
+ </body>
129
+ </html>
130
+ SAMPLE_4
131
+
132
+ urls = ContentUrls::HtmlParser.urls(html_sample_4)
133
+ urls.first.should eq 'sample.swf'
134
+ end
135
+ end
136
+
137
+ describe ContentUrls::HtmlParser do
138
+ it "should parse HTML Sample 5 and return 'img src' URL" do
139
+
140
+ html_sample_5 =<<SAMPLE_5
141
+ <html>
142
+ <head>
143
+ <title>HTML Sample 5</title>
144
+ </head>
145
+ <body>
146
+ <h1>HTML Sample 5</h1>
147
+ <img src="sample.gif">
148
+ </body>
149
+ </html>
150
+ SAMPLE_5
151
+
152
+ urls = ContentUrls::HtmlParser.urls(html_sample_5)
153
+ urls.first.should eq 'sample.gif'
154
+ end
155
+ end
156
+
157
+ describe ContentUrls::HtmlParser do
158
+ it "should parse HTML Sample 6 and return 'link href' URL" do
159
+
160
+ html_sample_6 =<<SAMPLE_6
161
+ <html>
162
+ <head>
163
+ <title>HTML Sample 6</title>
164
+ <link href="/index.php" REL="index">
165
+ </head>
166
+ <body>
167
+ <h1>HTML Sample 6</h1>
168
+ </body>
169
+ </html>
170
+ SAMPLE_6
171
+
172
+ urls = ContentUrls::HtmlParser.urls(html_sample_6)
173
+ urls.first.should eq '/index.php'
174
+ end
175
+ end
176
+
177
+ describe ContentUrls::HtmlParser do
178
+ it "should parse HTML Sample 7 and return 'object data' URL" do
179
+
180
+ html_sample_7 =<<SAMPLE_7
181
+ <html>
182
+ <head>
183
+ <title>HTML Sample 7</title>
184
+ </head>
185
+ <body>
186
+ <h1>HTML Sample 7</h1>
187
+ <object width="400" height="400" data="/stuff/example.swf"></object>
188
+ </body>
189
+ </html>
190
+ SAMPLE_7
191
+
192
+ urls = ContentUrls::HtmlParser.urls(html_sample_7)
193
+ urls.first.should eq '/stuff/example.swf'
194
+ end
195
+ end
196
+
197
+ describe ContentUrls::HtmlParser do
198
+ it "should parse HTML Sample 8 and return 'script src' URL" do
199
+
200
+ html_sample_8 =<<SAMPLE_8
201
+ <html>
202
+ <head>
203
+ <title>HTML Sample 8</title>
204
+ </head>
205
+ <body>
206
+ <h1>HTML Sample 8</h1>
207
+ <script language="javascript" src="../scripts/go.js"></script>
208
+ </body>
209
+ </html>
210
+ SAMPLE_8
211
+
212
+ urls = ContentUrls::HtmlParser.urls(html_sample_8)
213
+ urls.first.should eq '../scripts/go.js'
214
+ end
215
+ end
216
+
217
+ describe ContentUrls::HtmlParser do
218
+ it "should parse HTML Sample 9 and return 'meta content' URL" do
219
+
220
+ html_sample_9 =<<SAMPLE_9
221
+ <html>
222
+ <head>
223
+ <title>HTML Sample 9</title>
224
+ <meta http-equiv="refresh" content="5;URL='http://example.com/'">
225
+ </head>
226
+ <body>
227
+ <h1>HTML Sample 9</h1>
228
+ </body>
229
+ </html>
230
+ SAMPLE_9
231
+
232
+ urls = ContentUrls::HtmlParser.urls(html_sample_9)
233
+ urls.first.should eq 'http://example.com/'
234
+ end
235
+ end
236
+
237
+ describe ContentUrls::HtmlParser do
238
+ it "should parse HTML Sample 10 and return URLs found within 'style' attributes" do
239
+
240
+ html_sample_10 =<<SAMPLE_10
241
+ <html>
242
+ <head>
243
+ <title>HTML Sample 10</title>
244
+ </head>
245
+ <body style="background-image:url('background.jpg');">
246
+ <h1>HTML Sample 10</h1>
247
+ </body>
248
+ </html>
249
+ SAMPLE_10
250
+
251
+ urls = ContentUrls::HtmlParser.urls(html_sample_10)
252
+ urls.first.should eq 'background.jpg'
253
+ end
254
+ end
255
+
256
+ describe ContentUrls::HtmlParser do
257
+ it "should parse HTML Sample 11 and return URLs found within 'style' tags" do
258
+
259
+ html_sample_11 =<<SAMPLE_11
260
+ <html>
261
+ <head>
262
+ <title>HTML Sample 11</title>
263
+ <style type="text/css">
264
+ body {background-image:url('/image/background.jpg');}
265
+ </style>
266
+ </head>
267
+ <body>
268
+ <h1>HTML Sample 11</h1>
269
+ </body>
270
+ </html>
271
+ SAMPLE_11
272
+
273
+ urls = ContentUrls::HtmlParser.urls(html_sample_11)
274
+ urls.first.should eq '/image/background.jpg'
275
+ end
276
+ end
277
+
278
+ describe ContentUrls::HtmlParser do
279
+ it "should parse HTML Sample 12 and return URLs found within 'script' tags" do
280
+
281
+ html_sample_12 =<<SAMPLE_12
282
+ <html>
283
+ <head>
284
+ <title>HTML Sample 12</title>
285
+ <script type="text/javascript">
286
+ var link="http://www.sample.com/index.html"
287
+ // ...
288
+ </script>
289
+ </head>
290
+ <body>
291
+ <h1>HTML Sample 12</h1>
292
+ </body>
293
+ </html>
294
+ SAMPLE_12
295
+
296
+ urls = ContentUrls::HtmlParser.urls(html_sample_12)
297
+ urls.first.should eq 'http://www.sample.com/index.html'
298
+ end
299
+ end
300
+
301
+ describe ContentUrls::HtmlParser do
302
+ it "should execute the sample code for rewrite_each_url method" do
303
+ #output = ''
304
+ html = '<html><a href="index.htm">Click me</a></html>'
305
+ html = ContentUrls::HtmlParser.rewrite_each_url(html) {|url| 'index.php'}
306
+ #output += "Rewritten: #{html}" + "\n"
307
+ #output.should eq %Q{Rewritten: <html><a href="index.php">Click me</a></html>\n}
308
+ ContentUrls::HtmlParser.urls(html).first.should eq 'index.php' # Nokogiri rewrites HTML, instead check rewritten URL
309
+ end
310
+ it "should execute sample code for urls method" do
311
+ output = ''
312
+ html = '<html><a href="index.htm">Click me</a></html>'
313
+ ContentUrls::HtmlParser.urls(html).each do |url|
314
+ output += "Found URL: #{url}" + "\n"
315
+ end
316
+ output.should eq %Q{Found URL: index.htm\n}
317
+ end
318
+ end
@@ -0,0 +1,31 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe ContentUrls::JavaScriptParser do
4
+ it "should return no URLs given no content" do
5
+ ContentUrls::JavaScriptParser.urls('').should eq []
6
+ end
7
+ end
8
+
9
+ describe ContentUrls::JavaScriptParser do
10
+ it "should return the URLs in the content" do
11
+ ContentUrls::JavaScriptParser.urls('var link="http://www.sample.com/index.html"').first.should eq 'http://www.sample.com/index.html'
12
+ end
13
+ end
14
+
15
+ describe ContentUrls::JavaScriptParser do
16
+ it "should execute the sample code for rewrite_each_url method" do
17
+ output = ''
18
+ javascript = 'var link="http://example.com/"'
19
+ javascript = ContentUrls::JavaScriptParser.rewrite_each_url(javascript) {|url| url.upcase}
20
+ output += "Rewritten: #{javascript}" + "\n"
21
+ output.should eq %Q{Rewritten: var link="HTTP://EXAMPLE.COM/"\n}
22
+ end
23
+ it "should execute sample code for urls method" do
24
+ output = ''
25
+ javascript = 'var link="http://example.com/"'
26
+ ContentUrls::JavaScriptParser.urls(javascript).each do |url|
27
+ output += "Found URL: #{url}" + "\n"
28
+ end
29
+ output.should eq %Q{Found URL: http://example.com/\n}
30
+ end
31
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'content_urls'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,195 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: content_urls
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Dennis Sutch
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-03 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rspec
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: 2.8.0
38
+ type: :development
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: 2.8.0
46
+ - !ruby/object:Gem::Dependency
47
+ name: yard
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ~>
52
+ - !ruby/object:Gem::Version
53
+ version: '0.7'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ~>
60
+ - !ruby/object:Gem::Version
61
+ version: '0.7'
62
+ - !ruby/object:Gem::Dependency
63
+ name: rdoc
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ~>
68
+ - !ruby/object:Gem::Version
69
+ version: '3.12'
70
+ type: :development
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: '3.12'
78
+ - !ruby/object:Gem::Dependency
79
+ name: bundler
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
86
+ type: :development
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ - !ruby/object:Gem::Dependency
95
+ name: jeweler
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ~>
100
+ - !ruby/object:Gem::Version
101
+ version: 1.8.4
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ~>
108
+ - !ruby/object:Gem::Version
109
+ version: 1.8.4
110
+ - !ruby/object:Gem::Dependency
111
+ name: rcov
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - '='
116
+ - !ruby/object:Gem::Version
117
+ version: 0.9.9
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - '='
124
+ - !ruby/object:Gem::Version
125
+ version: 0.9.9
126
+ - !ruby/object:Gem::Dependency
127
+ name: rake
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 0.9.2.2
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 0.9.2.2
142
+ description: Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides
143
+ methods for iterating through URLs and changing URLs.
144
+ email: dennis@sutch.com
145
+ executables: []
146
+ extensions: []
147
+ extra_rdoc_files:
148
+ - LICENSE.txt
149
+ - README.rdoc
150
+ files:
151
+ - .document
152
+ - .rspec
153
+ - Gemfile
154
+ - LICENSE.txt
155
+ - README.rdoc
156
+ - Rakefile
157
+ - VERSION
158
+ - content_urls.gemspec
159
+ - lib/content_urls.rb
160
+ - lib/content_urls/parsers/css_parser.rb
161
+ - lib/content_urls/parsers/html_parser.rb
162
+ - lib/content_urls/parsers/java_script_parser.rb
163
+ - lib/content_urls/version.rb
164
+ - spec/content_urls_spec.rb
165
+ - spec/css_parser_spec.rb
166
+ - spec/html_parser_spec.rb
167
+ - spec/java_script_parser_spec.rb
168
+ - spec/spec_helper.rb
169
+ homepage: http://github.com/sutch/content_urls
170
+ licenses:
171
+ - MIT
172
+ post_install_message:
173
+ rdoc_options: []
174
+ require_paths:
175
+ - lib
176
+ required_ruby_version: !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
+ none: false
184
+ requirements:
185
+ - - ! '>='
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ requirements: []
189
+ rubyforge_project:
190
+ rubygems_version: 1.8.24
191
+ signing_key:
192
+ specification_version: 3
193
+ summary: Find and rewrite URLs in different types of content.
194
+ test_files: []
195
+ has_rdoc: