mechanize_content 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,4 +1,7 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- # Specify your gem's dependencies in mechanize_content.gemspec
4
3
  gemspec
4
+
5
+ group :development do
6
+ gem "rake"
7
+ end
@@ -4,35 +4,35 @@ module MechanizeContent
4
4
  MIN_HEIGHT = 64
5
5
  AD_WIDTH = 728
6
6
  AD_HEIGHT = 90
7
-
7
+
8
8
  def self.best_image(images, base_url)
9
9
  imgs = images.map{|i| Image.new(i, base_url)}
10
10
  top_image = imgs.select{|i| i.interesting_css?}.first || imgs.select{|i| i.interesting_file?}.first
11
11
  top_image.absolute_url if top_image
12
12
  end
13
-
13
+
14
14
  def initialize(image, base_url)
15
- @src = image["src"]
15
+ @src = URI.escape(image["src"])
16
16
  @width = image["width"].to_i
17
17
  @height = image["height"].to_i
18
18
  @base_url = base_url
19
19
  end
20
-
20
+
21
21
  def interesting_css?
22
22
  valid_image?(@width, @height)
23
23
  end
24
-
24
+
25
25
  def interesting_file?
26
26
  open(absolute_url, "rb") do |fh|
27
27
  is = ImageSize.new(fh.read)
28
28
  return valid_image?(is.width, is.height)
29
29
  end
30
30
  end
31
-
31
+
32
32
  def valid_image?(width, height)
33
33
  big_enough?(width, height) && not_advertising?(width, height) && allows_hotlinking?
34
34
  end
35
-
35
+
36
36
  def allows_hotlinking?
37
37
  begin
38
38
  open(absolute_url, "Referer" => "http://splitstate.com")
@@ -41,21 +41,21 @@ module MechanizeContent
41
41
  end
42
42
  true
43
43
  end
44
-
44
+
45
45
  def advertising?(width, height)
46
46
  @src.include?("banner") || @src.include?(".gif") || ((width == AD_WIDTH) && (height == AD_HEIGHT))
47
47
  end
48
-
48
+
49
49
  def not_advertising?(width, height)
50
50
  !advertising?(width, height)
51
51
  end
52
-
52
+
53
53
  def big_enough?(width, height)
54
54
  width > MIN_WIDTH && height > MIN_HEIGHT
55
55
  end
56
-
56
+
57
57
  def absolute_url
58
58
  URI.parse(@src).relative? ? (URI.parse(@base_url.to_s)+@src).to_s : @src
59
- end
59
+ end
60
60
  end
61
61
  end
@@ -1,3 +1,3 @@
1
1
  module MechanizeContent
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.2"
3
3
  end
@@ -2,21 +2,31 @@ require 'spec_helper'
2
2
 
3
3
  describe MechanizeContent::Image do
4
4
  use_vcr_cassette :record => :new_episodes
5
-
5
+
6
+ describe "#absolute_url" do
7
+ context "given a uri with a space" do
8
+ it "will be escaped" do
9
+ img = {"src" => "http://media.giantbomb.com/uploads/0/26/10180-psn icon_middle.jpg", "width" => 280, "height" => 283}
10
+ image = MechanizeContent::Image.new(img, "http://www.giantbomb.com/news/the-slow-motion-ballet-of-death-in-max-payne-3/3721/")
11
+ image.absolute_url.should eq("http://media.giantbomb.com/uploads/0/26/10180-psn%20icon_middle.jpg")
12
+ end
13
+ end
14
+ end
15
+
6
16
  describe "#interesting_css?" do
7
17
  context "given a gif" do
8
18
  it "is not interesting" do
9
19
  img = {"src" => "http://www.cmpevents.com/GD10/ablank.gif2", "width" => 500, "height" => 500}
10
20
  image = MechanizeContent::Image.new(img, "https://www.cmpevents.com")
11
- image.should_not be_interesting_css
21
+ image.should_not be_interesting_css
12
22
  end
13
23
  end
14
-
24
+
15
25
  context "given a banner" do
16
26
  it "is not interesting" do
17
27
  img = {"src" => "http://www.cmpevents.com/GD10/banner.png", "width" => 500, "height" => 500}
18
28
  image = MechanizeContent::Image.new(img, "https://www.cmpevents.com")
19
- image.should_not be_interesting_css
29
+ image.should_not be_interesting_css
20
30
  end
21
31
  end
22
32
 
@@ -35,5 +45,5 @@ describe MechanizeContent::Image do
35
45
  image.should be_interesting_css
36
46
  end
37
47
  end
38
- end
48
+ end
39
49
  end
metadata CHANGED
@@ -1,82 +1,79 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: mechanize_content
3
- version: !ruby/object:Gem::Version
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
4
5
  prerelease:
5
- version: 0.3.1
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - John Griffin
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2011-07-10 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
12
+ date: 2011-10-09 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
16
15
  name: mechanize
17
- prerelease: false
18
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &70365206410400 !ruby/object:Gem::Requirement
19
17
  none: false
20
- requirements:
18
+ requirements:
21
19
  - - ~>
22
- - !ruby/object:Gem::Version
20
+ - !ruby/object:Gem::Version
23
21
  version: 2.0.1
24
22
  type: :runtime
25
- version_requirements: *id001
26
- - !ruby/object:Gem::Dependency
27
- name: imagesize
28
23
  prerelease: false
29
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *70365206410400
25
+ - !ruby/object:Gem::Dependency
26
+ name: imagesize
27
+ requirement: &70365206409620 !ruby/object:Gem::Requirement
30
28
  none: false
31
- requirements:
29
+ requirements:
32
30
  - - ~>
33
- - !ruby/object:Gem::Version
31
+ - !ruby/object:Gem::Version
34
32
  version: 0.1.1
35
33
  type: :runtime
36
- version_requirements: *id002
37
- - !ruby/object:Gem::Dependency
38
- name: rspec
39
34
  prerelease: false
40
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *70365206409620
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ requirement: &70365206408940 !ruby/object:Gem::Requirement
41
39
  none: false
42
- requirements:
40
+ requirements:
43
41
  - - ~>
44
- - !ruby/object:Gem::Version
42
+ - !ruby/object:Gem::Version
45
43
  version: 2.6.0
46
44
  type: :development
47
- version_requirements: *id003
48
- - !ruby/object:Gem::Dependency
49
- name: vcr
50
45
  prerelease: false
51
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *70365206408940
47
+ - !ruby/object:Gem::Dependency
48
+ name: vcr
49
+ requirement: &70365206408160 !ruby/object:Gem::Requirement
52
50
  none: false
53
- requirements:
51
+ requirements:
54
52
  - - ~>
55
- - !ruby/object:Gem::Version
53
+ - !ruby/object:Gem::Version
56
54
  version: 1.10.0
57
55
  type: :development
58
- version_requirements: *id004
59
- - !ruby/object:Gem::Dependency
60
- name: fakeweb
61
56
  prerelease: false
62
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *70365206408160
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &70365206407500 !ruby/object:Gem::Requirement
63
61
  none: false
64
- requirements:
62
+ requirements:
65
63
  - - ~>
66
- - !ruby/object:Gem::Version
64
+ - !ruby/object:Gem::Version
67
65
  version: 1.3.0
68
66
  type: :development
69
- version_requirements: *id005
70
- description: pass in a url or urls and mechanize-content will select the best block of text, image and title by analysing the page content
71
- email:
67
+ prerelease: false
68
+ version_requirements: *70365206407500
69
+ description: pass in a url or urls and mechanize-content will select the best block
70
+ of text, image and title by analysing the page content
71
+ email:
72
72
  - johnog@gmail.com
73
73
  executables: []
74
-
75
74
  extensions: []
76
-
77
75
  extra_rdoc_files: []
78
-
79
- files:
76
+ files:
80
77
  - .gitignore
81
78
  - .rspec
82
79
  - .rvmrc
@@ -98,32 +95,35 @@ files:
98
95
  - spec/spec_helper.rb
99
96
  homepage: http://github.com/john-griffin/mechanize-content
100
97
  licenses: []
101
-
102
98
  post_install_message:
103
99
  rdoc_options: []
104
-
105
- require_paths:
100
+ require_paths:
106
101
  - lib
107
- required_ruby_version: !ruby/object:Gem::Requirement
102
+ required_ruby_version: !ruby/object:Gem::Requirement
108
103
  none: false
109
- requirements:
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- version: "0"
113
- required_rubygems_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ segments:
109
+ - 0
110
+ hash: -160408397260513251
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
112
  none: false
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- version: "0"
113
+ requirements:
114
+ - - ! '>='
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ segments:
118
+ - 0
119
+ hash: -160408397260513251
119
120
  requirements: []
120
-
121
121
  rubyforge_project: mechanize_content
122
- rubygems_version: 1.8.5
122
+ rubygems_version: 1.8.7
123
123
  signing_key:
124
124
  specification_version: 3
125
125
  summary: scrape the best content from a page
126
- test_files:
126
+ test_files:
127
127
  - spec/cassettes/MechanizeContent.yml
128
128
  - spec/cassettes/MechanizeContent_Image.yml
129
129
  - spec/mechanize_content/image_spec.rb