mechanize_content 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mechanize_content.rb +5 -1
- data/lib/mechanize_content/page.rb +24 -13
- data/lib/mechanize_content/version.rb +1 -1
- data/mechanize_content.gemspec +1 -1
- data/spec/cassettes/MechanizeContent.yml +12902 -5713
- data/spec/mechanize_content_spec.rb +43 -20
- metadata +14 -20
@@ -3,7 +3,7 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
|
3
3
|
|
4
4
|
describe "MechanizeContent" do
|
5
5
|
use_vcr_cassette :record => :new_episodes
|
6
|
-
|
6
|
+
|
7
7
|
describe "#best_title" do
|
8
8
|
context "given a page with a title" do
|
9
9
|
it "will be that title" do
|
@@ -11,7 +11,7 @@ describe "MechanizeContent" do
|
|
11
11
|
mc.best_title.should eql("Techmeme")
|
12
12
|
end
|
13
13
|
end
|
14
|
-
|
14
|
+
|
15
15
|
context "given the page has an incorrect class" do
|
16
16
|
it "will be the url" do
|
17
17
|
p = MechanizeContent::Page.new("http://techmeme.com/")
|
@@ -21,7 +21,7 @@ describe "MechanizeContent" do
|
|
21
21
|
mc.best_title.should eql("http://techmeme.com/")
|
22
22
|
end
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
context "given page has no title" do
|
26
26
|
it "will be the url" do
|
27
27
|
p = mock("page", :title => nil, :url => "http://techmeme.com/")
|
@@ -30,16 +30,16 @@ describe "MechanizeContent" do
|
|
30
30
|
mc.best_title.should eql("http://techmeme.com/")
|
31
31
|
end
|
32
32
|
end
|
33
|
-
|
33
|
+
|
34
34
|
context "given multiple urls are passed in" do
|
35
35
|
it "will find the best title" do
|
36
36
|
mc = MechanizeContent::Parser.new("http://www.facebook.com/RockBand", "http://www.vg247.com/2010/03/09/rock-band-3-out-this-holiday-will-revolutionize-genre/")
|
37
37
|
mc.best_title.should eql("Rock Band | Facebook")
|
38
38
|
end
|
39
39
|
end
|
40
|
-
|
40
|
+
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
describe "#best_text" do
|
44
44
|
context "page with no text" do
|
45
45
|
it "will be nil" do
|
@@ -47,42 +47,42 @@ describe "MechanizeContent" do
|
|
47
47
|
mc.best_text.should eql(nil)
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
50
|
+
|
51
51
|
context "given a page with text" do
|
52
52
|
it "will be the text" do
|
53
53
|
mc2 = MechanizeContent::Parser.new("http://www.gamesetwatch.com/2010/03/gdc_2010_rounds_off_indie_cove.php")
|
54
54
|
mc2.best_text.should eql("Game Developers Conference organizers have confirmed the final set of independent game-specific content, including Ron Carmel on the just-debuted Indie Fund, the Gamma IV party/showcase, and the EGW-replacing Nuovo Sessions game showcase.The newly confirmed details round off a multitude of independent game-specific content at the March 9th-13th event, held at the Moscone Center in San Francisco, including the 12th Annual Independent Games Festival -- featuring over 30 top indie games playable on the GDC Expo floor from Thursday 11th to Saturday 13th, as well as the major IGF Awards on Thursday 11th at 6.30pm.In addition, the 4th Independent Games Summit on Tuesday 9th and Wednesday 10th has added and clarified a number of sessions, with 2D Boy's Ron Carmel kicking off the event with 'Indies and Publishers: Fixing a System That Never Worked', now confirmed to discuss the new Indie Fund organization.Another major new panel, 'Tripping The Art Fantastic', features Spelunky creator Derek Yu, Braid artist David Hellman and Super Meat Boy co-creator Edmund McMillen discussing \"how each one of these figures influences the state of game art, from hand painted epics to short form experimental Flash games.\"")
|
55
55
|
end
|
56
56
|
end
|
57
|
-
|
57
|
+
|
58
58
|
context "given multiple urls are passed in" do
|
59
59
|
it "will find the best text" do
|
60
60
|
mc = MechanizeContent::Parser.new("http://www.facebook.com/RockBand", "http://www.vg247.com/2010/03/09/rock-band-3-out-this-holiday-will-revolutionize-genre/")
|
61
61
|
mc.best_text.should include("Harmonix just confirmed that Rock Band 3 will release this holiday season.Said the firm on Rock Band’s Facebook page:“Harmonix is developing Rock Band 3 for worldwide release this holiday season! The game, which will be published by MTV Games and distributed by Electronic Arts, will innovate and revolutionize the music genre once again, just as Harmonix did with the original Rock Band, Rock Band 2 and The Beatles: Rock Band. Stay tuned for more details!”There’s no more detail right now, but keep watching for updates from GDC.")
|
62
62
|
end
|
63
63
|
end
|
64
|
-
|
64
|
+
|
65
65
|
context "given the first link has no text" do
|
66
66
|
it "will pull text from the next link" do
|
67
67
|
mc = MechanizeContent::Parser.new("http://www.gog.com/en/gamecard/another_world_15th_anniversary_edition", "http://www.destructoid.com/-nuff-said-good-old-games-gets-another-world-168150.phtml", "http://www.joystiq.com/2010/03/18/another-world-15th-anniversary-edition-now-on-gog-com/")
|
68
68
|
mc.best_text.should eql("Éric Chahi masterpiece is looking better than ever\r\r\rPosted on 2010-03-18 09:46:36 by \r\rEclipse:\r\rThe first time I've ever played Another World was on my 486, I remember I booted up the game and wow, I was so shocked at how "cinematic" it felt, it looked and played like an animated sci-fi movie, fluid animations, always unique enemies, action puzzles... I remember it was the first game I've ever played that needed you to actually run away from the beast atread more\r the end of the first level instead of just shooting the hell out of your enemies. The world around Lester felt more alive than every game I've ever played before and Lester himself was a true human, not some sort of pumped superdude with a shotgun.Smooth animations like I was never able to see from Prince of Persia times, glorious and fascinating graphics, rapid action sequences and an hard yet satisfying gameplay.When I played this remade version for the first time I had a similar feeling, It wasn't something new, I've played this game dozen of times during years, but the sharp vector graphics i was able too see as child on my old crt monitor looked pixelly on my new 23" LCD, this restyle reminds me more than the original version how I used to look at this game, I know it may sound strange but it's true.And yay, no more passwords to remember :)This game made my childhood and it's one of the best games EVER.\r\r\r\rWas this helpful?\r\r\r(105 of 119 people found this helpful)")
|
69
69
|
end
|
70
70
|
end
|
71
|
-
|
71
|
+
|
72
72
|
context "given a page with strange class names" do
|
73
73
|
it "should still be able to find the text" do
|
74
74
|
mc = MechanizeContent::Parser.new("http://www.destructoid.com/-nuff-said-good-old-games-gets-another-world-168150.phtml")
|
75
75
|
mc.best_text.should eql("Another World -- or Out of this World, as many of you will know it by -- is now on DRM-free digital distribution service Good Old Games. It can be had for $9.99. Need I say more?\rI love the game, even though I have never made it more than oh, five minutes in. It's more or less universally loved by the Destructoid staff. Not long after we got an email detailing the good news, the thread soon reached fifteen or so replies full of praise for the game.\rOther, less exciting recent releases include: Empire Earth II Gold, Gabriel Knight 3, and Aquanox. Not to completely s**t on these games, but this is Another World we're talking about here.")
|
76
76
|
end
|
77
77
|
end
|
78
|
-
|
78
|
+
|
79
79
|
context "given a flash marketing micro site" do
|
80
80
|
it "will be nil" do
|
81
81
|
mc = MechanizeContent::Parser.new("http://www.sonypictures.co.uk/movies/cloudywithachanceofmeatballs/")
|
82
82
|
mc.best_text.should eql(nil)
|
83
83
|
end
|
84
84
|
end
|
85
|
-
|
85
|
+
|
86
86
|
context "given a first page with text that is too short" do
|
87
87
|
it "will pull text from the next suitable page" do
|
88
88
|
mc = MechanizeContent::Parser.new("http://www.examiner.com/fps-in-national/playstation-network-up-japan-video-announcement-video",
|
@@ -92,7 +92,7 @@ describe "MechanizeContent" do
|
|
92
92
|
end
|
93
93
|
end
|
94
94
|
end
|
95
|
-
|
95
|
+
|
96
96
|
describe "#best_image" do
|
97
97
|
context "given a page with a suitable image" do
|
98
98
|
it "will be the image url" do
|
@@ -100,14 +100,14 @@ describe "MechanizeContent" do
|
|
100
100
|
mc.best_image.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
|
101
101
|
end
|
102
102
|
end
|
103
|
-
|
103
|
+
|
104
104
|
context "given a page with no suitable images" do
|
105
105
|
it "will be nil" do
|
106
106
|
mc3 = MechanizeContent::Parser.new("http://www.gog.com/en/gamecard/another_world_15th_anniversary_edition")
|
107
107
|
mc3.best_image.should eql(nil)
|
108
108
|
end
|
109
109
|
end
|
110
|
-
|
110
|
+
|
111
111
|
context "given multiple urls are passed in" do
|
112
112
|
it "will find the best image" do
|
113
113
|
mc = MechanizeContent::Parser.new("http://www.facebook.com/RockBand", "http://www.vg247.com/2010/03/09/rock-band-3-out-this-holiday-will-revolutionize-genre/")
|
@@ -117,12 +117,12 @@ describe "MechanizeContent" do
|
|
117
117
|
|
118
118
|
context "given a url with strange utf8 encoding" do
|
119
119
|
it "will find the best image" do
|
120
|
-
mc = MechanizeContent::Parser.new(%w(http://www.computerandvideogames.com/310643/news/lg-dedicated-gaming-handhelds-are-over/
|
120
|
+
mc = MechanizeContent::Parser.new(%w(http://www.computerandvideogames.com/310643/news/lg-dedicated-gaming-handhelds-are-over/
|
121
121
|
http://www.destructoid.com/lg-age-of-dedicated-gaming-handhelds-is-over--205485.phtml))
|
122
122
|
mc.best_image.should eql("http://cdn.medialib.computerandvideogames.com/screens/screenshot_259495_thumb_wide300.jpg")
|
123
123
|
end
|
124
124
|
end
|
125
|
-
|
125
|
+
|
126
126
|
context "given the first pick does not support hot linking" do
|
127
127
|
it "will fall back to the next image" do
|
128
128
|
mc = MechanizeContent::Parser.new("http://www.andriasang.com/e/blog/2011/05/27/kojima_questions/",
|
@@ -134,7 +134,30 @@ describe "MechanizeContent" do
|
|
134
134
|
end
|
135
135
|
end
|
136
136
|
end
|
137
|
-
|
137
|
+
|
138
|
+
describe "#best_image_iphone" do
|
139
|
+
context "given a page with an iphone fav icon" do
|
140
|
+
it "will be the image url" do
|
141
|
+
mc = MechanizeContent::Parser.new("http://www.officialplaystationmagazine.co.uk/2011/11/16/historic-issue-65-cover-features-hideo-kojima/")
|
142
|
+
mc.best_image_iphone.should eql("http://static.officialplaystationmagazine.co.uk/wp-content/themes/opm/style/images/icon/apple-touch-icon.png")
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
context "given a page with an iphone fav icon" do
|
147
|
+
it "will be the image url" do
|
148
|
+
mc = MechanizeContent::Parser.new("http://www.eurogamer.net/articles/2011-11-17-oddword-strangers-wrath-hd-ps3-complete")
|
149
|
+
mc.best_image_iphone.should eql("http://www.eurogamer.net/img/EurogamerPage/apple-touch-icon.png")
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
context "given a page with no iphone icon" do
|
154
|
+
it "will return the normal image url" do
|
155
|
+
mc = MechanizeContent::Parser.new("http://www.rockstargames.com/newswire/article/16021/street-crimes-of-la-noire-new-screenshots-info.html")
|
156
|
+
mc.best_image_iphone.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
138
161
|
describe ".initialize" do
|
139
162
|
context "given an array of urls are passed in" do
|
140
163
|
it "will still parse content" do
|
@@ -144,7 +167,7 @@ describe "MechanizeContent" do
|
|
144
167
|
mc.best_image.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
|
145
168
|
end
|
146
169
|
end
|
147
|
-
|
170
|
+
|
148
171
|
context "given urls are passed in as seperate arguements" do
|
149
172
|
it "will parse arguements" do
|
150
173
|
mc = MechanizeContent::Parser.new("http://www.rockstargames.com/newswire/article/16021/street-crimes-of-la-noire-new-screenshots-info.html", "http://www.vg247.com/2010/03/18/gta-iv-episodes-from-liberty-city-sees-slight-delay-on-pc-and-ps3/")
|
@@ -153,5 +176,5 @@ describe "MechanizeContent" do
|
|
153
176
|
mc.best_image.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
|
154
177
|
end
|
155
178
|
end
|
156
|
-
end
|
179
|
+
end
|
157
180
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mechanize_content
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-
|
12
|
+
date: 2011-11-17 00:00:00.000000000Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: mechanize
|
16
|
-
requirement: &
|
16
|
+
requirement: &70212194376380 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ~>
|
@@ -21,10 +21,10 @@ dependencies:
|
|
21
21
|
version: 2.0.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70212194376380
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: imagesize
|
27
|
-
requirement: &
|
27
|
+
requirement: &70212194375580 !ruby/object:Gem::Requirement
|
28
28
|
none: false
|
29
29
|
requirements:
|
30
30
|
- - ~>
|
@@ -32,10 +32,10 @@ dependencies:
|
|
32
32
|
version: 0.1.1
|
33
33
|
type: :runtime
|
34
34
|
prerelease: false
|
35
|
-
version_requirements: *
|
35
|
+
version_requirements: *70212194375580
|
36
36
|
- !ruby/object:Gem::Dependency
|
37
37
|
name: rspec
|
38
|
-
requirement: &
|
38
|
+
requirement: &70212194236180 !ruby/object:Gem::Requirement
|
39
39
|
none: false
|
40
40
|
requirements:
|
41
41
|
- - ~>
|
@@ -43,21 +43,21 @@ dependencies:
|
|
43
43
|
version: 2.6.0
|
44
44
|
type: :development
|
45
45
|
prerelease: false
|
46
|
-
version_requirements: *
|
46
|
+
version_requirements: *70212194236180
|
47
47
|
- !ruby/object:Gem::Dependency
|
48
48
|
name: vcr
|
49
|
-
requirement: &
|
49
|
+
requirement: &70212194235100 !ruby/object:Gem::Requirement
|
50
50
|
none: false
|
51
51
|
requirements:
|
52
52
|
- - ~>
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 1.
|
54
|
+
version: 1.11.3
|
55
55
|
type: :development
|
56
56
|
prerelease: false
|
57
|
-
version_requirements: *
|
57
|
+
version_requirements: *70212194235100
|
58
58
|
- !ruby/object:Gem::Dependency
|
59
59
|
name: fakeweb
|
60
|
-
requirement: &
|
60
|
+
requirement: &70212194233980 !ruby/object:Gem::Requirement
|
61
61
|
none: false
|
62
62
|
requirements:
|
63
63
|
- - ~>
|
@@ -65,7 +65,7 @@ dependencies:
|
|
65
65
|
version: 1.3.0
|
66
66
|
type: :development
|
67
67
|
prerelease: false
|
68
|
-
version_requirements: *
|
68
|
+
version_requirements: *70212194233980
|
69
69
|
description: pass in a url or urls and mechanize-content will select the best block
|
70
70
|
of text, image and title by analysing the page content
|
71
71
|
email:
|
@@ -105,21 +105,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
105
105
|
- - ! '>='
|
106
106
|
- !ruby/object:Gem::Version
|
107
107
|
version: '0'
|
108
|
-
segments:
|
109
|
-
- 0
|
110
|
-
hash: -160408397260513251
|
111
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
112
109
|
none: false
|
113
110
|
requirements:
|
114
111
|
- - ! '>='
|
115
112
|
- !ruby/object:Gem::Version
|
116
113
|
version: '0'
|
117
|
-
segments:
|
118
|
-
- 0
|
119
|
-
hash: -160408397260513251
|
120
114
|
requirements: []
|
121
115
|
rubyforge_project: mechanize_content
|
122
|
-
rubygems_version: 1.8.
|
116
|
+
rubygems_version: 1.8.11
|
123
117
|
signing_key:
|
124
118
|
specification_version: 3
|
125
119
|
summary: scrape the best content from a page
|