plumnailer 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.4
1
+ 0.1.0
@@ -10,7 +10,7 @@ module Plumnailer
10
10
 
11
11
  def initialize(cache_dir)
12
12
  @cache_dir = cache_dir
13
- FileUtils.mkdir_p(cache_dir)
13
+ FileUtils.mkdir_p cache_dir
14
14
  end
15
15
 
16
16
  # Fetch the contents of a url and cache result on filesystem.
@@ -6,7 +6,7 @@ module Plumnailer
6
6
  def initialize
7
7
  @fetcher = Plumnailer::Fetcher.new
8
8
  @doc_parser = Plumnailer::DocParser.new
9
- @img_url_filters = [Plumnailer::ImgHostnameFilter.new]
9
+ @img_url_filters = [Plumnailer::ImgUrlFilter.new]
10
10
  @img_parser = Plumnailer::ImgParser.new(fetcher)
11
11
  @img_comparator = Plumnailer::ImgComparator
12
12
  end
@@ -28,7 +28,7 @@ module Plumnailer
28
28
  imgs.each do |img|
29
29
  # set source document on image so it can be used in comparator
30
30
  img.doc = doc
31
- img.extend(@img_comparator)
31
+ img.extend @img_comparator
32
32
  end
33
33
  imgs.sort.first
34
34
  end
@@ -5,6 +5,12 @@ module Plumnailer
5
5
  # Nokogiri::HTML:Document mixin.
6
6
  module Doc
7
7
 
8
+ # Get the href attribute of the base tag from the head of the document.
9
+ def doc_base_href
10
+ base = at('//head/base')
11
+ base['href'] if base
12
+ end
13
+
8
14
  # Return a list of the src attributes of all img tags.
9
15
  def img_srcs
10
16
  search('//img').map { |x| x['src'] }.compact
@@ -20,7 +26,12 @@ module Plumnailer
20
26
  rescue URI::InvalidURIError
21
27
  next
22
28
  end
23
- result.push(u.is_a?(URI::HTTP) ? u : URI.join(base_url || source_url, i))
29
+
30
+ result << if u.is_a?(URI::HTTP)
31
+ u
32
+ else
33
+ URI.join(base_url || doc_base_href || source_url, i)
34
+ end
24
35
  end
25
36
 
26
37
  result
@@ -14,7 +14,7 @@ module Plumnailer
14
14
 
15
15
  # Parse image data from one or more urls.
16
16
  def parse(img_urls)
17
- if img_urls.respond_to? :inject
17
+ if img_urls.respond_to?(:inject)
18
18
  cache = {}
19
19
  img_urls.inject([]) do |memo,u|
20
20
  # nil values should be cached
@@ -29,7 +29,7 @@ module Plumnailer
29
29
  # additional fields.
30
30
  def parse_one(img_url)
31
31
  img_data = fetcher.fetch(img_url)
32
- unless !img_data or img_data.empty?
32
+ unless not img_data or img_data.empty?
33
33
  img = Magick::ImageList.new.from_blob(img_data).extend(
34
34
  Plumnailer::WebImage)
35
35
  img.source_url = img_url
@@ -0,0 +1,23 @@
1
+ module Plumnailer
2
+
3
+ # Decide whether to process images based on their url.
4
+ class ImgUrlFilter
5
+
6
+ # Return true if this image url should not be considered.
7
+ def reject?(img_url)
8
+ ImgUrlPatterns.each do |re|
9
+ return true if img_url and img_url.to_s[re]
10
+ end
11
+ false
12
+ end
13
+
14
+ ImgUrlPatterns = [
15
+ %r{^http://ad\.doubleclick\.net/},
16
+ %r{^http://b\.scorecardresearch\.com/},
17
+ %r{^http://pixel\.quantserve\.com/},
18
+ %r{^http://s7\.addthis\.com/},
19
+ ]
20
+
21
+ end
22
+
23
+ end
data/lib/plumnailer.rb CHANGED
@@ -3,8 +3,8 @@ require 'plumnailer/doc_parser'
3
3
  require 'plumnailer/doc'
4
4
  require 'plumnailer/fetcher'
5
5
  require 'plumnailer/img_comparator'
6
- require 'plumnailer/img_hostname_filter'
7
6
  require 'plumnailer/img_parser'
7
+ require 'plumnailer/img_url_filter'
8
8
  require 'plumnailer/web_image'
9
9
 
10
10
  require 'plumnailer/caching_fetcher'
data/plumnailer.gemspec CHANGED
@@ -1,15 +1,15 @@
1
1
  # Generated by jeweler
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{plumnailer}
8
- s.version = "0.0.4"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matthew M. Boedicker"]
12
- s.date = %q{2010-11-11}
12
+ s.date = %q{2010-11-21}
13
13
  s.description = %q{Choose the most representative image on an HTML page for use as a thumbnail}
14
14
  s.email = %q{matthewm@boedicker.org}
15
15
  s.extra_rdoc_files = [
@@ -17,24 +17,23 @@ Gem::Specification.new do |s|
17
17
  ]
18
18
  s.files = [
19
19
  "COPYING",
20
- "README.textile",
21
- "Rakefile",
22
- "VERSION",
23
- "lib/plumnailer.rb",
24
- "lib/plumnailer/caching_fetcher.rb",
25
- "lib/plumnailer/chooser.rb",
26
- "lib/plumnailer/doc.rb",
27
- "lib/plumnailer/doc_parser.rb",
28
- "lib/plumnailer/fetcher.rb",
29
- "lib/plumnailer/img_comparator.rb",
30
- "lib/plumnailer/img_hostname_filter.rb",
31
- "lib/plumnailer/img_parser.rb",
32
- "lib/plumnailer/web_image.rb",
33
- "plumnailer.gemspec",
34
- "test.rb"
20
+ "README.textile",
21
+ "Rakefile",
22
+ "VERSION",
23
+ "lib/plumnailer.rb",
24
+ "lib/plumnailer/caching_fetcher.rb",
25
+ "lib/plumnailer/chooser.rb",
26
+ "lib/plumnailer/doc.rb",
27
+ "lib/plumnailer/doc_parser.rb",
28
+ "lib/plumnailer/fetcher.rb",
29
+ "lib/plumnailer/img_comparator.rb",
30
+ "lib/plumnailer/img_parser.rb",
31
+ "lib/plumnailer/img_url_filter.rb",
32
+ "lib/plumnailer/web_image.rb",
33
+ "plumnailer.gemspec",
34
+ "test.rb"
35
35
  ]
36
36
  s.homepage = %q{http://github.com/mmb/plumnailer}
37
- s.rdoc_options = ["--charset=UTF-8"]
38
37
  s.require_paths = ["lib"]
39
38
  s.rubygems_version = %q{1.3.7}
40
39
  s.summary = %q{Choose the most representative image on an HTML page}
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: plumnailer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 27
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 4
10
- version: 0.0.4
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matthew M. Boedicker
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-11 00:00:00 -05:00
18
+ date: 2010-11-21 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -71,8 +71,8 @@ files:
71
71
  - lib/plumnailer/doc_parser.rb
72
72
  - lib/plumnailer/fetcher.rb
73
73
  - lib/plumnailer/img_comparator.rb
74
- - lib/plumnailer/img_hostname_filter.rb
75
74
  - lib/plumnailer/img_parser.rb
75
+ - lib/plumnailer/img_url_filter.rb
76
76
  - lib/plumnailer/web_image.rb
77
77
  - plumnailer.gemspec
78
78
  - test.rb
@@ -81,8 +81,8 @@ homepage: http://github.com/mmb/plumnailer
81
81
  licenses: []
82
82
 
83
83
  post_install_message:
84
- rdoc_options:
85
- - --charset=UTF-8
84
+ rdoc_options: []
85
+
86
86
  require_paths:
87
87
  - lib
88
88
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -1,22 +0,0 @@
1
- module Plumnailer
2
-
3
- # Decide whether to process images based on their url hostname.
4
- class ImgHostnameFilter
5
-
6
- # Return true if this image url should not be considered.
7
- def reject?(img_url)
8
- HostnameRejectPatterns.each do |re|
9
- return true if img_url.host and img_url.host[re]
10
- end
11
- false
12
- end
13
-
14
- HostnameRejectPatterns = [
15
- %r{^ad\.doubleclick\.net$},
16
- %r{^b\.scorecardresearch\.com$},
17
- %r{^pixel\.quantserve\.com$},
18
- ]
19
-
20
- end
21
-
22
- end