mechanize_content 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,7 +3,7 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
3
3
 
4
4
  describe "MechanizeContent" do
5
5
  use_vcr_cassette :record => :new_episodes
6
-
6
+
7
7
  describe "#best_title" do
8
8
  context "given a page with a title" do
9
9
  it "will be that title" do
@@ -11,7 +11,7 @@ describe "MechanizeContent" do
11
11
  mc.best_title.should eql("Techmeme")
12
12
  end
13
13
  end
14
-
14
+
15
15
  context "given the page has an incorrect class" do
16
16
  it "will be the url" do
17
17
  p = MechanizeContent::Page.new("http://techmeme.com/")
@@ -21,7 +21,7 @@ describe "MechanizeContent" do
21
21
  mc.best_title.should eql("http://techmeme.com/")
22
22
  end
23
23
  end
24
-
24
+
25
25
  context "given page has no title" do
26
26
  it "will be the url" do
27
27
  p = mock("page", :title => nil, :url => "http://techmeme.com/")
@@ -30,16 +30,16 @@ describe "MechanizeContent" do
30
30
  mc.best_title.should eql("http://techmeme.com/")
31
31
  end
32
32
  end
33
-
33
+
34
34
  context "given multiple urls are passed in" do
35
35
  it "will find the best title" do
36
36
  mc = MechanizeContent::Parser.new("http://www.facebook.com/RockBand", "http://www.vg247.com/2010/03/09/rock-band-3-out-this-holiday-will-revolutionize-genre/")
37
37
  mc.best_title.should eql("Rock Band | Facebook")
38
38
  end
39
39
  end
40
-
40
+
41
41
  end
42
-
42
+
43
43
  describe "#best_text" do
44
44
  context "page with no text" do
45
45
  it "will be nil" do
@@ -47,42 +47,42 @@ describe "MechanizeContent" do
47
47
  mc.best_text.should eql(nil)
48
48
  end
49
49
  end
50
-
50
+
51
51
  context "given a page with text" do
52
52
  it "will be the text" do
53
53
  mc2 = MechanizeContent::Parser.new("http://www.gamesetwatch.com/2010/03/gdc_2010_rounds_off_indie_cove.php")
54
54
  mc2.best_text.should eql("Game Developers Conference organizers have confirmed the final set of independent game-specific content, including Ron Carmel on the just-debuted Indie Fund, the Gamma IV party/showcase, and the EGW-replacing Nuovo Sessions game showcase.The newly confirmed details round off a multitude of independent game-specific content at the March 9th-13th event, held at the Moscone Center in San Francisco, including the 12th Annual Independent Games Festival -- featuring over 30 top indie games playable on the GDC Expo floor from Thursday 11th to Saturday 13th, as well as the major IGF Awards on Thursday 11th at 6.30pm.In addition, the 4th Independent Games Summit on Tuesday 9th and Wednesday 10th has added and clarified a number of sessions, with 2D Boy's Ron Carmel kicking off the event with 'Indies and Publishers: Fixing a System That Never Worked', now confirmed to discuss the new Indie Fund organization.Another major new panel, 'Tripping The Art Fantastic', features Spelunky creator Derek Yu, Braid artist David Hellman and Super Meat Boy co-creator Edmund McMillen discussing \"how each one of these figures influences the state of game art, from hand painted epics to short form experimental Flash games.\"")
55
55
  end
56
56
  end
57
-
57
+
58
58
  context "given multiple urls are passed in" do
59
59
  it "will find the best text" do
60
60
  mc = MechanizeContent::Parser.new("http://www.facebook.com/RockBand", "http://www.vg247.com/2010/03/09/rock-band-3-out-this-holiday-will-revolutionize-genre/")
61
61
  mc.best_text.should include("Harmonix just confirmed that Rock Band 3 will release this holiday season.Said the firm on Rock Band’s Facebook page:“Harmonix is developing Rock Band 3 for worldwide release this holiday season! The game, which will be published by MTV Games and distributed by Electronic Arts, will innovate and revolutionize the music genre once again, just as Harmonix did with the original Rock Band, Rock Band 2 and The Beatles: Rock Band. Stay tuned for more details!”There’s no more detail right now, but keep watching for updates from GDC.")
62
62
  end
63
63
  end
64
-
64
+
65
65
  context "given the first link has no text" do
66
66
  it "will pull text from the next link" do
67
67
  mc = MechanizeContent::Parser.new("http://www.gog.com/en/gamecard/another_world_15th_anniversary_edition", "http://www.destructoid.com/-nuff-said-good-old-games-gets-another-world-168150.phtml", "http://www.joystiq.com/2010/03/18/another-world-15th-anniversary-edition-now-on-gog-com/")
68
68
  mc.best_text.should eql("Éric Chahi masterpiece is looking better than ever\r\r\rPosted on 2010-03-18 09:46:36 by \r\rEclipse:\r\rThe first time I've ever played Another World was on my 486, I remember I booted up the game and wow, I was so shocked at how "cinematic" it felt, it looked and played like an animated sci-fi movie, fluid animations, always unique enemies, action puzzles... I remember it was the first game I've ever played that needed you to actually run away from the beast atread more\r the end of the first level instead of just shooting the hell out of your enemies. The world around Lester felt more alive than every game I've ever played before and Lester himself was a true human, not some sort of pumped superdude with a shotgun.Smooth animations like I was never able to see from Prince of Persia times, glorious and fascinating graphics, rapid action sequences and an hard yet satisfying gameplay.When I played this remade version for the first time I had a similar feeling, It wasn't something new, I've played this game dozen of times during years, but the sharp vector graphics i was able too see as child on my old crt monitor looked pixelly on my new 23" LCD, this restyle reminds me more than the original version how I used to look at this game, I know it may sound strange but it's true.And yay, no more passwords to remember :)This game made my childhood and it's one of the best games EVER.\r\r\r\rWas this helpful?\r\r\r(105 of 119 people found this helpful)")
69
69
  end
70
70
  end
71
-
71
+
72
72
  context "given a page with strange class names" do
73
73
  it "should still be able to find the text" do
74
74
  mc = MechanizeContent::Parser.new("http://www.destructoid.com/-nuff-said-good-old-games-gets-another-world-168150.phtml")
75
75
  mc.best_text.should eql("Another World -- or Out of this World, as many of you will know it by -- is now on DRM-free digital distribution service Good Old Games. It can be had for $9.99. Need I say more?\rI love the game, even though I have never made it more than oh, five minutes in. It's more or less universally loved by the Destructoid staff. Not long after we got an email detailing the good news, the thread soon reached fifteen or so replies full of praise for the game.\rOther, less exciting recent releases include: Empire Earth II Gold, Gabriel Knight 3, and Aquanox. Not to completely s**t on these games, but this is Another World we're talking about here.")
76
76
  end
77
77
  end
78
-
78
+
79
79
  context "given a flash marketing micro site" do
80
80
  it "will be nil" do
81
81
  mc = MechanizeContent::Parser.new("http://www.sonypictures.co.uk/movies/cloudywithachanceofmeatballs/")
82
82
  mc.best_text.should eql(nil)
83
83
  end
84
84
  end
85
-
85
+
86
86
  context "given a first page with text that is too short" do
87
87
  it "will pull text from the next suitable page" do
88
88
  mc = MechanizeContent::Parser.new("http://www.examiner.com/fps-in-national/playstation-network-up-japan-video-announcement-video",
@@ -92,7 +92,7 @@ describe "MechanizeContent" do
92
92
  end
93
93
  end
94
94
  end
95
-
95
+
96
96
  describe "#best_image" do
97
97
  context "given a page with a suitable image" do
98
98
  it "will be the image url" do
@@ -100,14 +100,14 @@ describe "MechanizeContent" do
100
100
  mc.best_image.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
101
101
  end
102
102
  end
103
-
103
+
104
104
  context "given a page with no suitable images" do
105
105
  it "will be nil" do
106
106
  mc3 = MechanizeContent::Parser.new("http://www.gog.com/en/gamecard/another_world_15th_anniversary_edition")
107
107
  mc3.best_image.should eql(nil)
108
108
  end
109
109
  end
110
-
110
+
111
111
  context "given multiple urls are passed in" do
112
112
  it "will find the best image" do
113
113
  mc = MechanizeContent::Parser.new("http://www.facebook.com/RockBand", "http://www.vg247.com/2010/03/09/rock-band-3-out-this-holiday-will-revolutionize-genre/")
@@ -117,12 +117,12 @@ describe "MechanizeContent" do
117
117
 
118
118
  context "given a url with strange utf8 encoding" do
119
119
  it "will find the best image" do
120
- mc = MechanizeContent::Parser.new(%w(http://www.computerandvideogames.com/310643/news/lg-dedicated-gaming-handhelds-are-over/
120
+ mc = MechanizeContent::Parser.new(%w(http://www.computerandvideogames.com/310643/news/lg-dedicated-gaming-handhelds-are-over/
121
121
  http://www.destructoid.com/lg-age-of-dedicated-gaming-handhelds-is-over--205485.phtml))
122
122
  mc.best_image.should eql("http://cdn.medialib.computerandvideogames.com/screens/screenshot_259495_thumb_wide300.jpg")
123
123
  end
124
124
  end
125
-
125
+
126
126
  context "given the first pick does not support hot linking" do
127
127
  it "will fall back to the next image" do
128
128
  mc = MechanizeContent::Parser.new("http://www.andriasang.com/e/blog/2011/05/27/kojima_questions/",
@@ -134,7 +134,30 @@ describe "MechanizeContent" do
134
134
  end
135
135
  end
136
136
  end
137
-
137
+
138
+ describe "#best_image_iphone" do
139
+ context "given a page with an iphone fav icon" do
140
+ it "will be the image url" do
141
+ mc = MechanizeContent::Parser.new("http://www.officialplaystationmagazine.co.uk/2011/11/16/historic-issue-65-cover-features-hideo-kojima/")
142
+ mc.best_image_iphone.should eql("http://static.officialplaystationmagazine.co.uk/wp-content/themes/opm/style/images/icon/apple-touch-icon.png")
143
+ end
144
+ end
145
+
146
+ context "given a page with an iphone fav icon" do
147
+ it "will be the image url" do
148
+ mc = MechanizeContent::Parser.new("http://www.eurogamer.net/articles/2011-11-17-oddword-strangers-wrath-hd-ps3-complete")
149
+ mc.best_image_iphone.should eql("http://www.eurogamer.net/img/EurogamerPage/apple-touch-icon.png")
150
+ end
151
+ end
152
+
153
+ context "given a page with no iphone icon" do
154
+ it "will return the normal image url" do
155
+ mc = MechanizeContent::Parser.new("http://www.rockstargames.com/newswire/article/16021/street-crimes-of-la-noire-new-screenshots-info.html")
156
+ mc.best_image_iphone.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
157
+ end
158
+ end
159
+ end
160
+
138
161
  describe ".initialize" do
139
162
  context "given an array of urls are passed in" do
140
163
  it "will still parse content" do
@@ -144,7 +167,7 @@ describe "MechanizeContent" do
144
167
  mc.best_image.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
145
168
  end
146
169
  end
147
-
170
+
148
171
  context "given urls are passed in as seperate arguements" do
149
172
  it "will parse arguements" do
150
173
  mc = MechanizeContent::Parser.new("http://www.rockstargames.com/newswire/article/16021/street-crimes-of-la-noire-new-screenshots-info.html", "http://www.vg247.com/2010/03/18/gta-iv-episodes-from-liberty-city-sees-slight-delay-on-pc-and-ps3/")
@@ -153,5 +176,5 @@ describe "MechanizeContent" do
153
176
  mc.best_image.should eql("http://media.rockstargames.com/rockstargames/img/global/news/upload/lanoire_streetcrimes_cameraobscura.jpg")
154
177
  end
155
178
  end
156
- end
179
+ end
157
180
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mechanize_content
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-10-09 00:00:00.000000000Z
12
+ date: 2011-11-17 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mechanize
16
- requirement: &70365206410400 !ruby/object:Gem::Requirement
16
+ requirement: &70212194376380 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ~>
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: 2.0.1
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70365206410400
24
+ version_requirements: *70212194376380
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: imagesize
27
- requirement: &70365206409620 !ruby/object:Gem::Requirement
27
+ requirement: &70212194375580 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ~>
@@ -32,10 +32,10 @@ dependencies:
32
32
  version: 0.1.1
33
33
  type: :runtime
34
34
  prerelease: false
35
- version_requirements: *70365206409620
35
+ version_requirements: *70212194375580
36
36
  - !ruby/object:Gem::Dependency
37
37
  name: rspec
38
- requirement: &70365206408940 !ruby/object:Gem::Requirement
38
+ requirement: &70212194236180 !ruby/object:Gem::Requirement
39
39
  none: false
40
40
  requirements:
41
41
  - - ~>
@@ -43,21 +43,21 @@ dependencies:
43
43
  version: 2.6.0
44
44
  type: :development
45
45
  prerelease: false
46
- version_requirements: *70365206408940
46
+ version_requirements: *70212194236180
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: vcr
49
- requirement: &70365206408160 !ruby/object:Gem::Requirement
49
+ requirement: &70212194235100 !ruby/object:Gem::Requirement
50
50
  none: false
51
51
  requirements:
52
52
  - - ~>
53
53
  - !ruby/object:Gem::Version
54
- version: 1.10.0
54
+ version: 1.11.3
55
55
  type: :development
56
56
  prerelease: false
57
- version_requirements: *70365206408160
57
+ version_requirements: *70212194235100
58
58
  - !ruby/object:Gem::Dependency
59
59
  name: fakeweb
60
- requirement: &70365206407500 !ruby/object:Gem::Requirement
60
+ requirement: &70212194233980 !ruby/object:Gem::Requirement
61
61
  none: false
62
62
  requirements:
63
63
  - - ~>
@@ -65,7 +65,7 @@ dependencies:
65
65
  version: 1.3.0
66
66
  type: :development
67
67
  prerelease: false
68
- version_requirements: *70365206407500
68
+ version_requirements: *70212194233980
69
69
  description: pass in a url or urls and mechanize-content will select the best block
70
70
  of text, image and title by analysing the page content
71
71
  email:
@@ -105,21 +105,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
105
105
  - - ! '>='
106
106
  - !ruby/object:Gem::Version
107
107
  version: '0'
108
- segments:
109
- - 0
110
- hash: -160408397260513251
111
108
  required_rubygems_version: !ruby/object:Gem::Requirement
112
109
  none: false
113
110
  requirements:
114
111
  - - ! '>='
115
112
  - !ruby/object:Gem::Version
116
113
  version: '0'
117
- segments:
118
- - 0
119
- hash: -160408397260513251
120
114
  requirements: []
121
115
  rubyforge_project: mechanize_content
122
- rubygems_version: 1.8.7
116
+ rubygems_version: 1.8.11
123
117
  signing_key:
124
118
  specification_version: 3
125
119
  summary: scrape the best content from a page