youtubesearchresultscraper 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/youtube/searchresultscraper.rb +64 -5
- data/test/html/dataN_noMsgN.htm +387 -0
- data/test/html/dataY_noMsgY.htm +1507 -0
- data/test/html/scraping_error.html +1503 -0
- data/test/youtube_scraper_test.rb +89 -0
- metadata +7 -2
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require "runit/testcase"
|
4
|
+
require "runit/cui/testrunner"
|
5
|
+
|
6
|
+
require "rubygems"
|
7
|
+
require 'hpricot'
|
8
|
+
require "youtube/searchresultscraper"
|
9
|
+
|
10
|
+
class SearchResultScraperTest < RUNIT::TestCase
|
11
|
+
|
12
|
+
def test_scrape
|
13
|
+
|
14
|
+
#
|
15
|
+
#�����
|
16
|
+
#
|
17
|
+
#������̤�����
|
18
|
+
open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
19
|
+
#������̤��ʤ�
|
20
|
+
open_and_scrape("http://www.youtube.com/results?search_query=", "aeudyr jahafudfhadf ahf", 2)
|
21
|
+
|
22
|
+
#
|
23
|
+
#�۾��
|
24
|
+
#
|
25
|
+
#������̤����뤬��Not Found��������
|
26
|
+
begin
|
27
|
+
open_local_file_and_scrape("html/dataY_noMsgY.htm")
|
28
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
29
|
+
rescue RuntimeError =>e
|
30
|
+
#puts e
|
31
|
+
end
|
32
|
+
#������̤��ʤ���Not Found��ʤ����
|
33
|
+
begin
|
34
|
+
open_local_file_and_scrape("html/dataN_noMsgN.htm")
|
35
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
36
|
+
rescue RuntimeError
|
37
|
+
#puts e
|
38
|
+
end
|
39
|
+
|
40
|
+
#�����ι��ܤ��������Ǥ��Ƥ��ʤ����
|
41
|
+
begin
|
42
|
+
open_local_file_and_scrape("html/scraping_error.html")
|
43
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
44
|
+
rescue RuntimeError => e
|
45
|
+
# puts e
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_scrape_video_count
|
50
|
+
|
51
|
+
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
52
|
+
puts scraper.video_count
|
53
|
+
assert( scraper.video_count > 0 )
|
54
|
+
|
55
|
+
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemonifdadfa", 2)
|
56
|
+
puts scraper.video_count
|
57
|
+
assert( scraper.video_count == 0 )
|
58
|
+
end
|
59
|
+
|
60
|
+
def open_and_scrape url, keyword=nil, page=nil
|
61
|
+
scraper = MySearchResultScraper.new(url, keyword, page)
|
62
|
+
scraper.open
|
63
|
+
scraper.scrape
|
64
|
+
scraper
|
65
|
+
end
|
66
|
+
|
67
|
+
def open_local_file_and_scrape url
|
68
|
+
scraper = MySearchResultScraper.new(url)
|
69
|
+
scraper.open_local_file
|
70
|
+
scraper.scrape
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
class MySearchResultScraper < Youtube::SearchResultScraper
|
76
|
+
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
77
|
+
|
78
|
+
def initialize url, keyword=nil, page=nil
|
79
|
+
@@youtube_search_base_url = url
|
80
|
+
@keyword = keyword
|
81
|
+
@page = page if not page == nil
|
82
|
+
end
|
83
|
+
|
84
|
+
def open_local_file
|
85
|
+
@html = Kernel.open(@@youtube_search_base_url).read
|
86
|
+
replace_document_write_javascript
|
87
|
+
@search_result = Hpricot.parse(@html)
|
88
|
+
end
|
89
|
+
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: youtubesearchresultscraper
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2006-12-
|
6
|
+
version: 0.0.3
|
7
|
+
date: 2006-12-22 00:00:00 +09:00
|
8
8
|
summary: This gem provide function to scrape html of search result on youtube
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -32,6 +32,11 @@ files:
|
|
32
32
|
- lib/youtube
|
33
33
|
- lib/youtube/video.rb
|
34
34
|
- lib/youtube/searchresultscraper.rb
|
35
|
+
- test/youtube_scraper_test.rb
|
36
|
+
- test/html
|
37
|
+
- test/html/scraping_error.html
|
38
|
+
- test/html/dataY_noMsgY.htm
|
39
|
+
- test/html/dataN_noMsgN.htm
|
35
40
|
- CHANGELOG
|
36
41
|
- MIT-LICENSE
|
37
42
|
- README
|