plumnailer 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.4
1
+ 0.1.0
@@ -10,7 +10,7 @@ module Plumnailer
10
10
 
11
11
  def initialize(cache_dir)
12
12
  @cache_dir = cache_dir
13
- FileUtils.mkdir_p(cache_dir)
13
+ FileUtils.mkdir_p cache_dir
14
14
  end
15
15
 
16
16
  # Fetch the contents of a url and cache result on filesystem.
@@ -6,7 +6,7 @@ module Plumnailer
6
6
  def initialize
7
7
  @fetcher = Plumnailer::Fetcher.new
8
8
  @doc_parser = Plumnailer::DocParser.new
9
- @img_url_filters = [Plumnailer::ImgHostnameFilter.new]
9
+ @img_url_filters = [Plumnailer::ImgUrlFilter.new]
10
10
  @img_parser = Plumnailer::ImgParser.new(fetcher)
11
11
  @img_comparator = Plumnailer::ImgComparator
12
12
  end
@@ -28,7 +28,7 @@ module Plumnailer
28
28
  imgs.each do |img|
29
29
  # set source document on image so it can be used in comparator
30
30
  img.doc = doc
31
- img.extend(@img_comparator)
31
+ img.extend @img_comparator
32
32
  end
33
33
  imgs.sort.first
34
34
  end
@@ -5,6 +5,12 @@ module Plumnailer
5
5
  # Nokogiri::HTML:Document mixin.
6
6
  module Doc
7
7
 
8
+ # Get the href attribute of the base tag from the head of the document.
9
+ def doc_base_href
10
+ base = at('//head/base')
11
+ base['href'] if base
12
+ end
13
+
8
14
  # Return a list of the src attributes of all img tags.
9
15
  def img_srcs
10
16
  search('//img').map { |x| x['src'] }.compact
@@ -20,7 +26,12 @@ module Plumnailer
20
26
  rescue URI::InvalidURIError
21
27
  next
22
28
  end
23
- result.push(u.is_a?(URI::HTTP) ? u : URI.join(base_url || source_url, i))
29
+
30
+ result << if u.is_a?(URI::HTTP)
31
+ u
32
+ else
33
+ URI.join(base_url || doc_base_href || source_url, i)
34
+ end
24
35
  end
25
36
 
26
37
  result
@@ -14,7 +14,7 @@ module Plumnailer
14
14
 
15
15
  # Parse image data from one or more urls.
16
16
  def parse(img_urls)
17
- if img_urls.respond_to? :inject
17
+ if img_urls.respond_to?(:inject)
18
18
  cache = {}
19
19
  img_urls.inject([]) do |memo,u|
20
20
  # nil values should be cached
@@ -29,7 +29,7 @@ module Plumnailer
29
29
  # additional fields.
30
30
  def parse_one(img_url)
31
31
  img_data = fetcher.fetch(img_url)
32
- unless !img_data or img_data.empty?
32
+ unless not img_data or img_data.empty?
33
33
  img = Magick::ImageList.new.from_blob(img_data).extend(
34
34
  Plumnailer::WebImage)
35
35
  img.source_url = img_url
@@ -0,0 +1,23 @@
1
+ module Plumnailer
2
+
3
+ # Decide whether to process images based on their url.
4
+ class ImgUrlFilter
5
+
6
+ # Return true if this image url should not be considered.
7
+ def reject?(img_url)
8
+ ImgUrlPatterns.each do |re|
9
+ return true if img_url and img_url.to_s[re]
10
+ end
11
+ false
12
+ end
13
+
14
+ ImgUrlPatterns = [
15
+ %r{^http://ad\.doubleclick\.net/},
16
+ %r{^http://b\.scorecardresearch\.com/},
17
+ %r{^http://pixel\.quantserve\.com/},
18
+ %r{^http://s7\.addthis\.com/},
19
+ ]
20
+
21
+ end
22
+
23
+ end
data/lib/plumnailer.rb CHANGED
@@ -3,8 +3,8 @@ require 'plumnailer/doc_parser'
3
3
  require 'plumnailer/doc'
4
4
  require 'plumnailer/fetcher'
5
5
  require 'plumnailer/img_comparator'
6
- require 'plumnailer/img_hostname_filter'
7
6
  require 'plumnailer/img_parser'
7
+ require 'plumnailer/img_url_filter'
8
8
  require 'plumnailer/web_image'
9
9
 
10
10
  require 'plumnailer/caching_fetcher'
data/plumnailer.gemspec CHANGED
@@ -1,15 +1,15 @@
1
1
  # Generated by jeweler
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{plumnailer}
8
- s.version = "0.0.4"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matthew M. Boedicker"]
12
- s.date = %q{2010-11-11}
12
+ s.date = %q{2010-11-21}
13
13
  s.description = %q{Choose the most representative image on an HTML page for use as a thumbnail}
14
14
  s.email = %q{matthewm@boedicker.org}
15
15
  s.extra_rdoc_files = [
@@ -17,24 +17,23 @@ Gem::Specification.new do |s|
17
17
  ]
18
18
  s.files = [
19
19
  "COPYING",
20
- "README.textile",
21
- "Rakefile",
22
- "VERSION",
23
- "lib/plumnailer.rb",
24
- "lib/plumnailer/caching_fetcher.rb",
25
- "lib/plumnailer/chooser.rb",
26
- "lib/plumnailer/doc.rb",
27
- "lib/plumnailer/doc_parser.rb",
28
- "lib/plumnailer/fetcher.rb",
29
- "lib/plumnailer/img_comparator.rb",
30
- "lib/plumnailer/img_hostname_filter.rb",
31
- "lib/plumnailer/img_parser.rb",
32
- "lib/plumnailer/web_image.rb",
33
- "plumnailer.gemspec",
34
- "test.rb"
20
+ "README.textile",
21
+ "Rakefile",
22
+ "VERSION",
23
+ "lib/plumnailer.rb",
24
+ "lib/plumnailer/caching_fetcher.rb",
25
+ "lib/plumnailer/chooser.rb",
26
+ "lib/plumnailer/doc.rb",
27
+ "lib/plumnailer/doc_parser.rb",
28
+ "lib/plumnailer/fetcher.rb",
29
+ "lib/plumnailer/img_comparator.rb",
30
+ "lib/plumnailer/img_parser.rb",
31
+ "lib/plumnailer/img_url_filter.rb",
32
+ "lib/plumnailer/web_image.rb",
33
+ "plumnailer.gemspec",
34
+ "test.rb"
35
35
  ]
36
36
  s.homepage = %q{http://github.com/mmb/plumnailer}
37
- s.rdoc_options = ["--charset=UTF-8"]
38
37
  s.require_paths = ["lib"]
39
38
  s.rubygems_version = %q{1.3.7}
40
39
  s.summary = %q{Choose the most representative image on an HTML page}
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: plumnailer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 27
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 4
10
- version: 0.0.4
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matthew M. Boedicker
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-11 00:00:00 -05:00
18
+ date: 2010-11-21 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -71,8 +71,8 @@ files:
71
71
  - lib/plumnailer/doc_parser.rb
72
72
  - lib/plumnailer/fetcher.rb
73
73
  - lib/plumnailer/img_comparator.rb
74
- - lib/plumnailer/img_hostname_filter.rb
75
74
  - lib/plumnailer/img_parser.rb
75
+ - lib/plumnailer/img_url_filter.rb
76
76
  - lib/plumnailer/web_image.rb
77
77
  - plumnailer.gemspec
78
78
  - test.rb
@@ -81,8 +81,8 @@ homepage: http://github.com/mmb/plumnailer
81
81
  licenses: []
82
82
 
83
83
  post_install_message:
84
- rdoc_options:
85
- - --charset=UTF-8
84
+ rdoc_options: []
85
+
86
86
  require_paths:
87
87
  - lib
88
88
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -1,22 +0,0 @@
1
- module Plumnailer
2
-
3
- # Decide whether to process images based on their url hostname.
4
- class ImgHostnameFilter
5
-
6
- # Return true if this image url should not be considered.
7
- def reject?(img_url)
8
- HostnameRejectPatterns.each do |re|
9
- return true if img_url.host and img_url.host[re]
10
- end
11
- false
12
- end
13
-
14
- HostnameRejectPatterns = [
15
- %r{^ad\.doubleclick\.net$},
16
- %r{^b\.scorecardresearch\.com$},
17
- %r{^pixel\.quantserve\.com$},
18
- ]
19
-
20
- end
21
-
22
- end