cyx-scraper 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/scraper/article.rb +1 -1
- data/lib/scraper/modules/video.rb +58 -0
- data/lib/scraper/modules/web.rb +9 -0
- data/lib/scraper/modules.rb +6 -0
- data/lib/scraper/vimeo.rb +70 -0
- data/lib/scraper/youtube.rb +26 -36
- data/lib/scraper.rb +6 -1
- data/scraper.gemspec +9 -1
- data/test/fixtures/5826468.html +1260 -0
- data/test/fixtures/dLO2s7SDHJo.html +2781 -0
- data/test/scraper_test.rb +8 -0
- data/test/test_helper.rb +1 -0
- data/test/vimeo_test.rb +99 -0
- data/test/youtube_test.rb +23 -0
- metadata +9 -1
data/test/scraper_test.rb
CHANGED
|
@@ -37,6 +37,10 @@ class ScraperTest < Test::Unit::TestCase
|
|
|
37
37
|
|
|
38
38
|
context "when extracting the actual content using the URL" do
|
|
39
39
|
setup do
|
|
40
|
+
Scraper::Modules::Web.expects(:open).returns(
|
|
41
|
+
File.open(@@fixture_path + '/unwebbable.html', 'r')
|
|
42
|
+
)
|
|
43
|
+
|
|
40
44
|
@url = "http://www.alistapart.com/articles/unwebbable/"
|
|
41
45
|
@scraper1 = Scraper::Article.new(:content => @article)
|
|
42
46
|
@scraper2 = Scraper::Article.new(:url => @url)
|
|
@@ -60,6 +64,10 @@ class ScraperTest < Test::Unit::TestCase
|
|
|
60
64
|
|
|
61
65
|
context "Scraper( <alist apart url> )" do
|
|
62
66
|
setup do
|
|
67
|
+
Scraper::Modules::Web.expects(:open).returns(
|
|
68
|
+
File.open(@@fixture_path + '/unwebbable.html', 'r')
|
|
69
|
+
)
|
|
70
|
+
|
|
63
71
|
@url = "http://www.alistapart.com/articles/unwebbable/"
|
|
64
72
|
end
|
|
65
73
|
|
data/test/test_helper.rb
CHANGED
data/test/vimeo_test.rb
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
require 'test_helper'
|
|
2
|
+
require 'hpricot'
|
|
3
|
+
|
|
4
|
+
class Scraper::VimeoTest < Test::Unit::TestCase
|
|
5
|
+
def stub_open_uri!
|
|
6
|
+
Scraper::Modules::Web.expects(:open).returns(
|
|
7
|
+
File.open(@@fixture_path + '/5826468.html', 'r')
|
|
8
|
+
)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
context "given the canonical URL http://vimeo.com/5826468" do
|
|
13
|
+
setup do
|
|
14
|
+
@url = "http://vimeo.com/5826468"
|
|
15
|
+
@vimeo = Scraper::Vimeo.new( :url => @url )
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
should "not raise an error" do
|
|
19
|
+
assert_nothing_raised do
|
|
20
|
+
Scraper::Vimeo.new( :url => @url )
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
should "have a video_id 5826468" do
|
|
25
|
+
assert_equal '5826468', @vimeo.video_id
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
should "have a video title 'Sunlight Heaven'" do
|
|
29
|
+
stub_open_uri!
|
|
30
|
+
assert_equal 'Sunlight Heaven', @vimeo.title
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
should "have a video description 'Sunrise is one of the greatest...'" do
|
|
34
|
+
stub_open_uri!
|
|
35
|
+
|
|
36
|
+
@desc = "Sunrise is one of the greatest things in life. it’s a pity that i don’t see it very often. Here i tried to catch the mood of the morning sun on the way back home to Sajkod from Balatonsound festival."
|
|
37
|
+
@desc << " \nshot in Hungary @ lake Balaton, mainly on the ferry from Szántód to Tihany."
|
|
38
|
+
@desc << " \nthe music is Sunlight, Heaven from Julianna Barwick"
|
|
39
|
+
@desc << " \ni used \ncanon hv30 \nDIY 35mm adapter (static) with nikon lens (50mm) 1.4"
|
|
40
|
+
|
|
41
|
+
assert_equal @desc, @vimeo.description
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
context "embed html width 1024x768 dimensions" do
|
|
45
|
+
setup do
|
|
46
|
+
@html = @vimeo.html( :width => 1024, :height => 768 )
|
|
47
|
+
@doc = Hpricot(@html)
|
|
48
|
+
@object = @doc.search('object').first
|
|
49
|
+
@embed = @doc.search('object > embed').first
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
should "have an object tag with 1024 width" do
|
|
53
|
+
assert_equal '1024', @object.attributes['width']
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
should "have an object tag with 768 height" do
|
|
57
|
+
assert_equal '768', @object.attributes['height']
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
should "have a param tag with it's video url in it" do
|
|
61
|
+
movie = @object.search('param').detect { |p|
|
|
62
|
+
p.attributes['name'] == 'movie'
|
|
63
|
+
}
|
|
64
|
+
assert_match(/5826468/, movie.attributes['value'])
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
should "have an embed tag with 1024 width" do
|
|
68
|
+
assert_equal '1024', @embed.attributes['width']
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
should "have an embed tag with 768 height" do
|
|
72
|
+
assert_equal '768', @embed.attributes['height']
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
should "have the correct thumbnail" do
|
|
76
|
+
stub_open_uri!
|
|
77
|
+
|
|
78
|
+
@expected = "http://ts.vimeo.com.s3.amazonaws.com"
|
|
79
|
+
@expected << "/204/207/20420769_100.jpg"
|
|
80
|
+
|
|
81
|
+
assert_equal @expected, @vimeo.thumbnail
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
context "a url like http://vimeo.com/5826468?ref=blablabla" do
|
|
88
|
+
setup do
|
|
89
|
+
@scraper = Scraper::Vimeo.new(:url =>
|
|
90
|
+
"http://vimeo.com/5826468?ref=blablabla"
|
|
91
|
+
)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
should "have a video_id 5826468" do
|
|
95
|
+
assert_equal '5826468', @scraper.video_id
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
end
|
data/test/youtube_test.rb
CHANGED
|
@@ -2,6 +2,12 @@ require 'test_helper'
|
|
|
2
2
|
require 'hpricot'
|
|
3
3
|
|
|
4
4
|
class Scraper::YoutubeTest < Test::Unit::TestCase
|
|
5
|
+
def stub_open_uri!
|
|
6
|
+
Scraper::Modules::Web.expects(:open).returns(
|
|
7
|
+
File.open(@@fixture_path + '/dLO2s7SDHJo.html', 'r')
|
|
8
|
+
)
|
|
9
|
+
end
|
|
10
|
+
|
|
5
11
|
context "given http://www.youtube.com/watch?v=dLO2s7SDHJo&feature=rec-HM-r2" do
|
|
6
12
|
setup do
|
|
7
13
|
@youtube = Scraper::Youtube.new(
|
|
@@ -12,6 +18,23 @@ class Scraper::YoutubeTest < Test::Unit::TestCase
|
|
|
12
18
|
should "have a video_id dLO2s7SDHJo" do
|
|
13
19
|
assert_equal "dLO2s7SDHJo", @youtube.video_id
|
|
14
20
|
end
|
|
21
|
+
|
|
22
|
+
should "have a title 'How to Hack Flash Games'" do
|
|
23
|
+
stub_open_uri!
|
|
24
|
+
|
|
25
|
+
assert_equal 'How to Hack Flash Games', @youtube.title
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
should "have a description Cheat Engine: ..." do
|
|
29
|
+
stub_open_uri!
|
|
30
|
+
|
|
31
|
+
@desc = "Cheat Engine: http://cheatengine.org/downloads.php"
|
|
32
|
+
@desc << " Music by: Reasoner http://reasoner.newgrounds.com"
|
|
33
|
+
@desc << " Twitter: http://www.twitter.com/householdhacker"
|
|
34
|
+
@desc << " We are going to show you how to hack any flash game using Cheat engine. You can hack games like bloons, desktop tower defense and many others."
|
|
35
|
+
|
|
36
|
+
assert_equal @desc, @youtube.description
|
|
37
|
+
end
|
|
15
38
|
end
|
|
16
39
|
|
|
17
40
|
context "given http://www.youtube.com/watch?feature=rec-HM-r2&v=dLO2s7SDHJo" do
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cyx-scraper
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Cyril David
|
|
@@ -31,13 +31,20 @@ files:
|
|
|
31
31
|
- VERSION
|
|
32
32
|
- lib/scraper.rb
|
|
33
33
|
- lib/scraper/article.rb
|
|
34
|
+
- lib/scraper/modules.rb
|
|
35
|
+
- lib/scraper/modules/video.rb
|
|
36
|
+
- lib/scraper/modules/web.rb
|
|
37
|
+
- lib/scraper/vimeo.rb
|
|
34
38
|
- lib/scraper/youtube.rb
|
|
35
39
|
- scraper.gemspec
|
|
36
40
|
- test/article_test.rb
|
|
41
|
+
- test/fixtures/5826468.html
|
|
42
|
+
- test/fixtures/dLO2s7SDHJo.html
|
|
37
43
|
- test/fixtures/scraped.html
|
|
38
44
|
- test/fixtures/unwebbable.html
|
|
39
45
|
- test/scraper_test.rb
|
|
40
46
|
- test/test_helper.rb
|
|
47
|
+
- test/vimeo_test.rb
|
|
41
48
|
- test/youtube_test.rb
|
|
42
49
|
has_rdoc: false
|
|
43
50
|
homepage: http://github.com/cyx/scraper
|
|
@@ -70,4 +77,5 @@ test_files:
|
|
|
70
77
|
- test/article_test.rb
|
|
71
78
|
- test/scraper_test.rb
|
|
72
79
|
- test/test_helper.rb
|
|
80
|
+
- test/vimeo_test.rb
|
|
73
81
|
- test/youtube_test.rb
|