content_urls 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ ODJhMDkzODQ3NDAzMWI5MGMzOWYzZDEzZDNkNTE4YTVlZjFmMjVmNA==
5
+ data.tar.gz: !binary |-
6
+ YWE0MGZkNmRkMGE1YWU4ODk5ZWI5YzQ2OGFkMmM5YjU4ZWRlY2ZlMw==
7
+ !binary "U0hBNTEy":
8
+ metadata.gz: !binary |-
9
+ MDJiOTE3YjlmMTI4NzY2Y2Q3ODI3NTNjYzMyMzRiYTkwNDJjODJjMTgwZjFi
10
+ NGFmYmUxY2NiZDVjOTM4ZjU3NDQ0MmFlNjk5ODVmNmJjZmJhMzQ3ZmMzZWQz
11
+ MzM1NDlhOGM2NDkwMTI0ZjEwYmE1ZGFkOTVlMmIyNzA4ODI0YjI=
12
+ data.tar.gz: !binary |-
13
+ OTEyZDY5ZjBjODVmODgwMDU4NzkwYzMyOGM5MmI2OTBiMGJkYjQwYTU2Y2M5
14
+ NDgxY2IzM2M1YzkxNjEwNzdlNDgyNTU1Y2U0YmYzZTUzY2U0NzAzNmZkMWI0
15
+ OWU5ZDU0NWU3MDU3YmFkNTBhNDFmZWJjM2Q2Y2NhMWE5YmU4MDQ=
data/.document CHANGED
@@ -1,5 +1,5 @@
1
- lib/**/*.rb
2
- bin/*
3
- -
4
- features/**/*.feature
5
- LICENSE.txt
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec CHANGED
@@ -1 +1 @@
1
- #--color
1
+ #--color
data/Gemfile CHANGED
@@ -1,13 +1,13 @@
1
- source "http://rubygems.org"
2
-
3
- gem "nokogiri"
4
-
5
- group :development do
6
- gem "rspec", "~> 2.8.0"
7
- gem "yard", "~> 0.7"
8
- gem "rdoc", "~> 3.12"
9
- gem "bundler"
10
- gem "jeweler", "~> 1.8.4"
11
- gem "rcov", "0.9.9"
12
- gem "rake", "~> 0.9.2.2"
13
- end
1
+ source "http://rubygems.org"
2
+
3
+ gem "nokogiri"
4
+
5
+ group :development do
6
+ gem "rspec", "~> 2.8.0"
7
+ gem "yard", "~> 0.7"
8
+ gem "rdoc", "~> 3.12"
9
+ gem "bundler"
10
+ gem "jeweler", "~> 1.8.4"
11
+ gem "rcov", "0.9.9"
12
+ gem "rake", "~> 0.9.2.2"
13
+ end
@@ -1,20 +1,20 @@
1
- Copyright (c) 2012 Dennis Sutch
2
-
3
- Permission is hereby granted, free of charge, to any person obtaining
4
- a copy of this software and associated documentation files (the
5
- "Software"), to deal in the Software without restriction, including
6
- without limitation the rights to use, copy, modify, merge, publish,
7
- distribute, sublicense, and/or sell copies of the Software, and to
8
- permit persons to whom the Software is furnished to do so, subject to
9
- the following conditions:
10
-
11
- The above copyright notice and this permission notice shall be
12
- included in all copies or substantial portions of the Software.
13
-
14
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
- LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
- OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
- WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
1
+ Copyright (c) 2012 Dennis Sutch
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -1,63 +1,73 @@
1
- = content_urls
2
-
3
- Find and rewrite URLs in different types of content.
4
-
5
- ContentUrls was developed to address two use cases:
6
- * Find each URL in content retrieved from a website in order to spider and find all content on the website.
7
- * Rewrite each URL in content retrieved from a website in order to make a working local copy of the website.
8
-
9
- == Features
10
- * Three types of content: HTML, CSS and JavaScript
11
- * HTML content
12
- * <a> tag href attribute
13
- * <area> tag href attribute
14
- * <body> tag background attribute
15
- * <embed> tag src attribute
16
- * <img> tag src attribute
17
- * <link> tag href attribute
18
- * <meta> tag content attribute containing URL
19
- * <object> tag data attribute
20
- * <script> tag src attribute
21
- * style attribute of any tag (parsed as CSS content)
22
- * body of <style> tag (parsed as CSS content)
23
- * body of <script> tag when type or language attribute identifies JavaScript (parsed as JavaScript content)
24
- * CSS content
25
- * url() notation
26
- * JavaScript content
27
- * URI module's REGEXP
28
-
29
- == Examples
30
- === Find URLs in an HTML document
31
- Provide the HTML content and the content type and obtain an array of unique URLs.
32
- ContentUrls.urls(html, 'text/html').each do |url|
33
- puts "Found URL: #{url}"
34
- end
35
-
36
- === Rewrite URLs in an HTML document
37
- Provide the HTML content, the content type, and a block to rewrite each URL's extension.
38
- rewritten_html = ContentUrls.rewrite_each_url(html, 'text/html') {|url| url.sub(/.htm/, '.html'}
39
-
40
- == Requirements
41
- * nokogiri
42
-
43
- == Development
44
- To test and develop this gem, additional requirements are:
45
- * bundler
46
- * jeweler
47
- * rake
48
- * rcov
49
- * rdoc
50
- * rspec
51
- * yard"
52
-
53
- == Contributing to content_urls
54
- * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
55
- * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
56
- * Fork the project.
57
- * Start a feature/bugfix branch.
58
- * Commit and push until you are happy with your contribution.
59
- * Make sure to add tests for it. This is important so I don't unintentionally break it in a future version.
60
- * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
61
-
62
- == Copyright
63
- Copyright (c) 2012 Dennis Sutch. See LICENSE.txt for further details.
1
+ = ContentUrls
2
+
3
+ Find and rewrite URLs in different types of content.
4
+
5
+ ContentUrls was developed to address two use cases:
6
+ * Find each URL in content retrieved from a website in order to spider and find all content on the website.
7
+ * Rewrite each URL in content retrieved from a website in order to make a working local copy of the website.
8
+
9
+ == Features
10
+ * Three types of content: HTML, CSS and JavaScript
11
+ * HTML content
12
+ * <a> tag href attribute
13
+ * <area> tag href attribute
14
+ * <body> tag background attribute
15
+ * <embed> tag src attribute
16
+ * <frame> tag src attribute
17
+ * <iframe> tag src attribute
18
+ * <img> tag src attribute
19
+ * <link> tag href attribute
20
+ * <meta> tag content attribute containing URL
21
+ * <object> tag data attribute
22
+ * <script> tag src attribute
23
+ * style attribute of any tag (parsed as CSS content)
24
+ * body of <style> tag (parsed as CSS content)
25
+ * body of <script> tag when type or language attribute identifies JavaScript (parsed as JavaScript content)
26
+ * CSS content
27
+ * url() notation
28
+ * JavaScript content
29
+ * URI module's REGEXP
30
+
31
+ == Examples
32
+ === Find URLs in an HTML document
33
+ Provide the HTML content and the content type and obtain an array of unique URLs.
34
+ ContentUrls.urls(html, 'text/html').each do |url|
35
+ puts "Found URL: #{url}"
36
+ end
37
+
38
+ === Rewrite URLs in an HTML document
39
+ Provide the HTML content, the content type, and a block to rewrite each URL's extension.
40
+ rewritten_html = ContentUrls.rewrite_each_url(html, 'text/html') {|url| url.sub(/.htm/, '.html'}
41
+
42
+ == Requirements
43
+ * nokogiri
44
+
45
+ == Development
46
+ To test and develop this gem, additional requirements are:
47
+ * bundler
48
+ * jeweler
49
+ * rake
50
+ * rcov
51
+ * rdoc
52
+ * rspec
53
+ * yard
54
+
55
+ == Goals for ContentUrls
56
+ * Include support for:
57
+ * Acrobat (.pdf)
58
+ * Flash (.swf)
59
+ * Microsoft Office (.doc, .xls, .ppt)
60
+ * text (regular expression for URLs)
61
+ * Capture links retrieved from a headless web browser which executes the code (JavaScript, etc.)
62
+
63
+ == Contributing to content_urls
64
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
65
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
66
+ * Fork the project.
67
+ * Start a feature/bugfix branch.
68
+ * Commit and push until you are happy with your contribution.
69
+ * Make sure to add tests for it. This is important so I don't unintentionally break it in a future version.
70
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
71
+
72
+ == Copyright
73
+ Copyright (c) 2012 Dennis Sutch. See LICENSE.txt for further details.
data/Rakefile CHANGED
@@ -1,42 +1,42 @@
1
- # encoding: utf-8
2
-
3
- require 'rubygems'
4
- require 'bundler'
5
- begin
6
- Bundler.setup(:default, :development)
7
- rescue Bundler::BundlerError => e
8
- $stderr.puts e.message
9
- $stderr.puts "Run `bundle install` to install missing gems"
10
- exit e.status_code
11
- end
12
- require 'rake'
13
-
14
- require 'jeweler'
15
- Jeweler::Tasks.new do |gem|
16
- # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
- gem.name = "content_urls"
18
- gem.homepage = "http://github.com/sutch/content_urls"
19
- gem.license = "MIT"
20
- gem.summary = %Q{Find and rewrite URLs in different types of content.}
21
- gem.description = %Q{Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs.}
22
- gem.email = "dennis@sutch.com"
23
- gem.authors = ["Dennis Sutch"]
24
- # dependencies defined in Gemfile
25
- end
26
- Jeweler::RubygemsDotOrgTasks.new
27
-
28
- require 'rspec/core'
29
- require 'rspec/core/rake_task'
30
- RSpec::Core::RakeTask.new(:spec) do |spec|
31
- spec.pattern = FileList['spec/**/*_spec.rb']
32
- end
33
-
34
- RSpec::Core::RakeTask.new(:rcov) do |spec|
35
- spec.pattern = 'spec/**/*_spec.rb'
36
- spec.rcov = true
37
- end
38
-
39
- task :default => :spec
40
-
41
- require 'yard'
42
- YARD::Rake::YardocTask.new
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "content_urls"
18
+ gem.homepage = "http://github.com/sutch/content_urls"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Find and rewrite URLs in different types of content.}
21
+ gem.description = %Q{Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs.}
22
+ gem.email = "dennis@sutch.com"
23
+ gem.authors = ["Dennis Sutch"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rspec/core'
29
+ require 'rspec/core/rake_task'
30
+ RSpec::Core::RakeTask.new(:spec) do |spec|
31
+ spec.pattern = FileList['spec/**/*_spec.rb']
32
+ end
33
+
34
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
35
+ spec.pattern = 'spec/**/*_spec.rb'
36
+ spec.rcov = true
37
+ end
38
+
39
+ task :default => :spec
40
+
41
+ require 'yard'
42
+ YARD::Rake::YardocTask.new
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.0
1
+ 0.1.1
Binary file
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "content_urls"
8
- s.version = "0.1.0"
8
+ s.version = "0.1.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Dennis Sutch"]
12
- s.date = "2012-10-03"
12
+ s.date = "2013-07-10"
13
13
  s.description = "Parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs."
14
14
  s.email = "dennis@sutch.com"
15
15
  s.extra_rdoc_files = [
@@ -24,6 +24,7 @@ Gem::Specification.new do |s|
24
24
  "README.rdoc",
25
25
  "Rakefile",
26
26
  "VERSION",
27
+ "content_urls-0.1.0.gem",
27
28
  "content_urls.gemspec",
28
29
  "lib/content_urls.rb",
29
30
  "lib/content_urls/parsers/css_parser.rb",
@@ -39,11 +40,11 @@ Gem::Specification.new do |s|
39
40
  s.homepage = "http://github.com/sutch/content_urls"
40
41
  s.licenses = ["MIT"]
41
42
  s.require_paths = ["lib"]
42
- s.rubygems_version = "1.8.23"
43
+ s.rubygems_version = "2.0.3"
43
44
  s.summary = "Find and rewrite URLs in different types of content."
44
45
 
45
46
  if s.respond_to? :specification_version then
46
- s.specification_version = 3
47
+ s.specification_version = 4
47
48
 
48
49
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
50
  s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
@@ -1,107 +1,107 @@
1
- require 'content_urls/version'
2
- require 'uri'
3
-
4
- # +ContentUrls+ parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs.
5
- #
6
- class ContentUrls
7
-
8
- # Returns the URLs found in the content.
9
- #
10
- # @param [String] content the content.
11
- # @param [String] type the media type of the content.
12
- # @return [Array] the unique URLs found in the content.
13
- #
14
- # @example Parse HTML code for URLs
15
- # content = '<html><a href="index.html">Home</a></html>'
16
- # ContentUrls.urls(content, 'text/html').each do |url|
17
- # puts "Found URL: #{url}"
18
- # end
19
- # # => "Found URL: index.html"
20
- #
21
- # @example Parse content obtained from a robot
22
- # response = Net::HTTP.get_response(URI('http://example.com/sample-1'))
23
- # puts "URLs found at http://example.com/sample-1:"
24
- # ContentUrls.urls(response.body, response.content_type).each do |url|
25
- # puts " #{url}"
26
- # end
27
- # # => [a list of URLs found in the content located at http://example.com/sample-1]
28
- #
29
- def self.urls(content, type)
30
- urls = []
31
- if (parser = get_parser(type))
32
- parser.new(content).urls.each { |url| urls << url }
33
- end
34
- urls
35
- end
36
-
37
- # Rewrites each URL in the content by calling the supplied block with each URL.
38
- #
39
- # @param [String] content the HTML content.
40
- # @param [String] type the media type of the content.
41
- # @returns [string] content the rewritten content.
42
- #
43
- # @example Rewrite URLs in HTML code
44
- # content = '<html><a href="index.htm">Home</a></html>'
45
- # content = ContentUrls.rewrite_each_url(content, 'text/html') {|url| 'gone.html'}
46
- # puts "Rewritten: #{content}"
47
- # # => "Rewritten: <html><a href="gone.html">Home</a></html>"
48
- #
49
- def self.rewrite_each_url(content, type, &block)
50
- if (parser = get_parser(type))
51
- parser.rewrite_each_url(content) do |url|
52
- replacement = yield url
53
- (replacement.nil? ? url : replacement)
54
- end
55
- end
56
- content
57
- end
58
-
59
- # Convert a relative URL to an absolute URL using base_url (for example, the content's original location or an HTML document's href attribute of the base tag).
60
- #
61
- # @example Obtain absolute URL of "../index.html" of page obtained from "http://example.com/one/two/sample.html"
62
- # puts ContentUrls.to_absolute("../index.html", "http://example.com/folder/sample.html")
63
- # # => "http://example.com/index.html"
64
- #
65
- def self.to_absolute(url, base_url)
66
- return nil if url.nil?
67
-
68
- url = URI.encode(URI.decode(url.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))) # remove anchor
69
- absolute = URI(base_url).merge(url)
70
- absolute.path = '/' if absolute.path.empty?
71
- absolute.to_s
72
- end
73
-
74
- protected
75
-
76
- @@type_parser = Hash.new { |hash, key| hash[key] = [] } # mapping of type regex to parser class
77
-
78
- # Register a parser implementation class for one or more content type regular expressions
79
- def self.register_parser(parser_class, *type_regexes)
80
- type_regexes.each do |regex|
81
- @@type_parser[regex].push parser_class
82
- end
83
- end
84
-
85
- # Return parser for a file type or nil if content type not recognized
86
- def self.get_parser(type)
87
- @@type_parser.each_pair do |regex, parser|
88
- if type =~ regex
89
- return parser.first
90
- end
91
- end
92
- return nil
93
- end
94
-
95
- # Parser implementations
96
- # - each implementation's urls method should return unique URLs
97
-
98
- require 'content_urls/parsers/html_parser'
99
- register_parser ContentUrls::HtmlParser, %r{^(text/html)\b}, %r{^(application/xhtml+xml)\b}
100
-
101
- require 'content_urls/parsers/css_parser'
102
- register_parser ContentUrls::CssParser, %r{^(text/css)\b}
103
-
104
- require 'content_urls/parsers/java_script_parser'
105
- register_parser ContentUrls::JavaScriptParser, %r{^(application/x-javascript)\b}, %r{^(application/javascript)\b}, %r{^(text/javascript)\b}
106
-
107
- end
1
+ require 'content_urls/version'
2
+ require 'uri'
3
+
4
+ # +ContentUrls+ parses various file types (HTML, CSS, JavaScript, ...) for URLs and provides methods for iterating through URLs and changing URLs.
5
+ #
6
+ class ContentUrls
7
+
8
+ # Returns the URLs found in the content.
9
+ #
10
+ # @param [String] content the content.
11
+ # @param [String] type the media type of the content.
12
+ # @return [Array] the unique URLs found in the content.
13
+ #
14
+ # @example Parse HTML code for URLs
15
+ # content = '<html><a href="index.html">Home</a></html>'
16
+ # ContentUrls.urls(content, 'text/html').each do |url|
17
+ # puts "Found URL: #{url}"
18
+ # end
19
+ # # => "Found URL: index.html"
20
+ #
21
+ # @example Parse content obtained from a robot
22
+ # response = Net::HTTP.get_response(URI('http://example.com/sample-1'))
23
+ # puts "URLs found at http://example.com/sample-1:"
24
+ # ContentUrls.urls(response.body, response.content_type).each do |url|
25
+ # puts " #{url}"
26
+ # end
27
+ # # => [a list of URLs found in the content located at http://example.com/sample-1]
28
+ #
29
+ def self.urls(content, type)
30
+ urls = []
31
+ if (parser = get_parser(type))
32
+ parser.new(content).urls.each { |url| urls << url }
33
+ end
34
+ urls
35
+ end
36
+
37
+ # Rewrites each URL in the content by calling the supplied block with each URL.
38
+ #
39
+ # @param [String] content the HTML content.
40
+ # @param [String] type the media type of the content.
41
+ # @returns [string] content the rewritten content.
42
+ #
43
+ # @example Rewrite URLs in HTML code
44
+ # content = '<html><a href="index.htm">Home</a></html>'
45
+ # content = ContentUrls.rewrite_each_url(content, 'text/html') {|url| 'gone.html'}
46
+ # puts "Rewritten: #{content}"
47
+ # # => "Rewritten: <html><a href="gone.html">Home</a></html>"
48
+ #
49
+ def self.rewrite_each_url(content, type, &block)
50
+ if (parser = get_parser(type))
51
+ parser.rewrite_each_url(content) do |url|
52
+ replacement = yield url
53
+ (replacement.nil? ? url : replacement)
54
+ end
55
+ end
56
+ content
57
+ end
58
+
59
+ # Convert a relative URL to an absolute URL using base_url (for example, the content's original location or an HTML document's href attribute of the base tag).
60
+ #
61
+ # @example Obtain absolute URL of "../index.html" of page obtained from "http://example.com/one/two/sample.html"
62
+ # puts ContentUrls.to_absolute("../index.html", "http://example.com/folder/sample.html")
63
+ # # => "http://example.com/index.html"
64
+ #
65
+ def self.to_absolute(url, base_url)
66
+ return nil if url.nil?
67
+
68
+ url = URI.encode(URI.decode(url.to_s.gsub(/#[a-zA-Z0-9_-]*$/,''))) # remove anchor
69
+ absolute = URI(base_url).merge(url)
70
+ absolute.path = '/' if absolute.path.empty?
71
+ absolute.to_s
72
+ end
73
+
74
+ protected
75
+
76
+ @@type_parser = Hash.new { |hash, key| hash[key] = [] } # mapping of type regex to parser class
77
+
78
+ # Register a parser implementation class for one or more content type regular expressions
79
+ def self.register_parser(parser_class, *type_regexes)
80
+ type_regexes.each do |regex|
81
+ @@type_parser[regex].push parser_class
82
+ end
83
+ end
84
+
85
+ # Return parser for a file type or nil if content type not recognized
86
+ def self.get_parser(type)
87
+ @@type_parser.each_pair do |regex, parser|
88
+ if type =~ regex
89
+ return parser.first
90
+ end
91
+ end
92
+ return nil
93
+ end
94
+
95
+ # Parser implementations
96
+ # - each implementation's urls method should return unique URLs
97
+
98
+ require 'content_urls/parsers/html_parser'
99
+ register_parser ContentUrls::HtmlParser, %r{^(text/html)\b}, %r{^(application/xhtml+xml)\b}
100
+
101
+ require 'content_urls/parsers/css_parser'
102
+ register_parser ContentUrls::CssParser, %r{^(text/css)\b}
103
+
104
+ require 'content_urls/parsers/java_script_parser'
105
+ register_parser ContentUrls::JavaScriptParser, %r{^(application/x-javascript)\b}, %r{^(application/javascript)\b}, %r{^(text/javascript)\b}
106
+
107
+ end