mechanize_content 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -1,4 +1,7 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- # Specify your gem's dependencies in mechanize_content.gemspec
4
3
  gemspec
4
+
5
+ group :development do
6
+ gem "rake"
7
+ end
@@ -4,35 +4,35 @@ module MechanizeContent
4
4
  MIN_HEIGHT = 64
5
5
  AD_WIDTH = 728
6
6
  AD_HEIGHT = 90
7
-
7
+
8
8
  def self.best_image(images, base_url)
9
9
  imgs = images.map{|i| Image.new(i, base_url)}
10
10
  top_image = imgs.select{|i| i.interesting_css?}.first || imgs.select{|i| i.interesting_file?}.first
11
11
  top_image.absolute_url if top_image
12
12
  end
13
-
13
+
14
14
  def initialize(image, base_url)
15
- @src = image["src"]
15
+ @src = URI.escape(image["src"])
16
16
  @width = image["width"].to_i
17
17
  @height = image["height"].to_i
18
18
  @base_url = base_url
19
19
  end
20
-
20
+
21
21
  def interesting_css?
22
22
  valid_image?(@width, @height)
23
23
  end
24
-
24
+
25
25
  def interesting_file?
26
26
  open(absolute_url, "rb") do |fh|
27
27
  is = ImageSize.new(fh.read)
28
28
  return valid_image?(is.width, is.height)
29
29
  end
30
30
  end
31
-
31
+
32
32
  def valid_image?(width, height)
33
33
  big_enough?(width, height) && not_advertising?(width, height) && allows_hotlinking?
34
34
  end
35
-
35
+
36
36
  def allows_hotlinking?
37
37
  begin
38
38
  open(absolute_url, "Referer" => "http://splitstate.com")
@@ -41,21 +41,21 @@ module MechanizeContent
41
41
  end
42
42
  true
43
43
  end
44
-
44
+
45
45
  def advertising?(width, height)
46
46
  @src.include?("banner") || @src.include?(".gif") || ((width == AD_WIDTH) && (height == AD_HEIGHT))
47
47
  end
48
-
48
+
49
49
  def not_advertising?(width, height)
50
50
  !advertising?(width, height)
51
51
  end
52
-
52
+
53
53
  def big_enough?(width, height)
54
54
  width > MIN_WIDTH && height > MIN_HEIGHT
55
55
  end
56
-
56
+
57
57
  def absolute_url
58
58
  URI.parse(@src).relative? ? (URI.parse(@base_url.to_s)+@src).to_s : @src
59
- end
59
+ end
60
60
  end
61
61
  end
@@ -1,3 +1,3 @@
1
1
  module MechanizeContent
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.2"
3
3
  end
@@ -2,21 +2,31 @@ require 'spec_helper'
2
2
 
3
3
  describe MechanizeContent::Image do
4
4
  use_vcr_cassette :record => :new_episodes
5
-
5
+
6
+ describe "#absolute_url" do
7
+ context "given a uri with a space" do
8
+ it "will be escaped" do
9
+ img = {"src" => "http://media.giantbomb.com/uploads/0/26/10180-psn icon_middle.jpg", "width" => 280, "height" => 283}
10
+ image = MechanizeContent::Image.new(img, "http://www.giantbomb.com/news/the-slow-motion-ballet-of-death-in-max-payne-3/3721/")
11
+ image.absolute_url.should eq("http://media.giantbomb.com/uploads/0/26/10180-psn%20icon_middle.jpg")
12
+ end
13
+ end
14
+ end
15
+
6
16
  describe "#interesting_css?" do
7
17
  context "given a gif" do
8
18
  it "is not interesting" do
9
19
  img = {"src" => "http://www.cmpevents.com/GD10/ablank.gif2", "width" => 500, "height" => 500}
10
20
  image = MechanizeContent::Image.new(img, "https://www.cmpevents.com")
11
- image.should_not be_interesting_css
21
+ image.should_not be_interesting_css
12
22
  end
13
23
  end
14
-
24
+
15
25
  context "given a banner" do
16
26
  it "is not interesting" do
17
27
  img = {"src" => "http://www.cmpevents.com/GD10/banner.png", "width" => 500, "height" => 500}
18
28
  image = MechanizeContent::Image.new(img, "https://www.cmpevents.com")
19
- image.should_not be_interesting_css
29
+ image.should_not be_interesting_css
20
30
  end
21
31
  end
22
32
 
@@ -35,5 +45,5 @@ describe MechanizeContent::Image do
35
45
  image.should be_interesting_css
36
46
  end
37
47
  end
38
- end
48
+ end
39
49
  end
metadata CHANGED
@@ -1,82 +1,79 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: mechanize_content
3
- version: !ruby/object:Gem::Version
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
4
5
  prerelease:
5
- version: 0.3.1
6
6
  platform: ruby
7
- authors:
7
+ authors:
8
8
  - John Griffin
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
-
13
- date: 2011-07-10 00:00:00 Z
14
- dependencies:
15
- - !ruby/object:Gem::Dependency
12
+ date: 2011-10-09 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
16
15
  name: mechanize
17
- prerelease: false
18
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &70365206410400 !ruby/object:Gem::Requirement
19
17
  none: false
20
- requirements:
18
+ requirements:
21
19
  - - ~>
22
- - !ruby/object:Gem::Version
20
+ - !ruby/object:Gem::Version
23
21
  version: 2.0.1
24
22
  type: :runtime
25
- version_requirements: *id001
26
- - !ruby/object:Gem::Dependency
27
- name: imagesize
28
23
  prerelease: false
29
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: *70365206410400
25
+ - !ruby/object:Gem::Dependency
26
+ name: imagesize
27
+ requirement: &70365206409620 !ruby/object:Gem::Requirement
30
28
  none: false
31
- requirements:
29
+ requirements:
32
30
  - - ~>
33
- - !ruby/object:Gem::Version
31
+ - !ruby/object:Gem::Version
34
32
  version: 0.1.1
35
33
  type: :runtime
36
- version_requirements: *id002
37
- - !ruby/object:Gem::Dependency
38
- name: rspec
39
34
  prerelease: false
40
- requirement: &id003 !ruby/object:Gem::Requirement
35
+ version_requirements: *70365206409620
36
+ - !ruby/object:Gem::Dependency
37
+ name: rspec
38
+ requirement: &70365206408940 !ruby/object:Gem::Requirement
41
39
  none: false
42
- requirements:
40
+ requirements:
43
41
  - - ~>
44
- - !ruby/object:Gem::Version
42
+ - !ruby/object:Gem::Version
45
43
  version: 2.6.0
46
44
  type: :development
47
- version_requirements: *id003
48
- - !ruby/object:Gem::Dependency
49
- name: vcr
50
45
  prerelease: false
51
- requirement: &id004 !ruby/object:Gem::Requirement
46
+ version_requirements: *70365206408940
47
+ - !ruby/object:Gem::Dependency
48
+ name: vcr
49
+ requirement: &70365206408160 !ruby/object:Gem::Requirement
52
50
  none: false
53
- requirements:
51
+ requirements:
54
52
  - - ~>
55
- - !ruby/object:Gem::Version
53
+ - !ruby/object:Gem::Version
56
54
  version: 1.10.0
57
55
  type: :development
58
- version_requirements: *id004
59
- - !ruby/object:Gem::Dependency
60
- name: fakeweb
61
56
  prerelease: false
62
- requirement: &id005 !ruby/object:Gem::Requirement
57
+ version_requirements: *70365206408160
58
+ - !ruby/object:Gem::Dependency
59
+ name: fakeweb
60
+ requirement: &70365206407500 !ruby/object:Gem::Requirement
63
61
  none: false
64
- requirements:
62
+ requirements:
65
63
  - - ~>
66
- - !ruby/object:Gem::Version
64
+ - !ruby/object:Gem::Version
67
65
  version: 1.3.0
68
66
  type: :development
69
- version_requirements: *id005
70
- description: pass in a url or urls and mechanize-content will select the best block of text, image and title by analysing the page content
71
- email:
67
+ prerelease: false
68
+ version_requirements: *70365206407500
69
+ description: pass in a url or urls and mechanize-content will select the best block
70
+ of text, image and title by analysing the page content
71
+ email:
72
72
  - johnog@gmail.com
73
73
  executables: []
74
-
75
74
  extensions: []
76
-
77
75
  extra_rdoc_files: []
78
-
79
- files:
76
+ files:
80
77
  - .gitignore
81
78
  - .rspec
82
79
  - .rvmrc
@@ -98,32 +95,35 @@ files:
98
95
  - spec/spec_helper.rb
99
96
  homepage: http://github.com/john-griffin/mechanize-content
100
97
  licenses: []
101
-
102
98
  post_install_message:
103
99
  rdoc_options: []
104
-
105
- require_paths:
100
+ require_paths:
106
101
  - lib
107
- required_ruby_version: !ruby/object:Gem::Requirement
102
+ required_ruby_version: !ruby/object:Gem::Requirement
108
103
  none: false
109
- requirements:
110
- - - ">="
111
- - !ruby/object:Gem::Version
112
- version: "0"
113
- required_rubygems_version: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ! '>='
106
+ - !ruby/object:Gem::Version
107
+ version: '0'
108
+ segments:
109
+ - 0
110
+ hash: -160408397260513251
111
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
112
  none: false
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- version: "0"
113
+ requirements:
114
+ - - ! '>='
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ segments:
118
+ - 0
119
+ hash: -160408397260513251
119
120
  requirements: []
120
-
121
121
  rubyforge_project: mechanize_content
122
- rubygems_version: 1.8.5
122
+ rubygems_version: 1.8.7
123
123
  signing_key:
124
124
  specification_version: 3
125
125
  summary: scrape the best content from a page
126
- test_files:
126
+ test_files:
127
127
  - spec/cassettes/MechanizeContent.yml
128
128
  - spec/cassettes/MechanizeContent_Image.yml
129
129
  - spec/mechanize_content/image_spec.rb