youtubescraper 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +20 -0
- data/MIT-LICENSE +20 -0
- data/README +23 -0
- data/lib/youtube/browsescraper.rb +260 -0
- data/lib/youtube/searchresultscraper.rb +263 -0
- data/lib/youtube/searchresultscraper.rb~ +263 -0
- data/lib/youtube/video.rb +62 -0
- data/test/html/dataN_noMsgN.htm +387 -0
- data/test/html/dataY_noMsgY.htm +1507 -0
- data/test/html/scraping_error.html +1503 -0
- data/test/youtube_scraper_test.rb +89 -0
- data/test/youtube_scraper_test.rb~ +92 -0
- metadata +69 -0
@@ -0,0 +1,89 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'test/unit'
|
4
|
+
|
5
|
+
|
6
|
+
require "rubygems"
|
7
|
+
require 'hpricot'
|
8
|
+
require "youtube/searchresultscraper"
|
9
|
+
|
10
|
+
class SearchResultScraperTest < Test::Unit::TestCase
|
11
|
+
|
12
|
+
def test_scrape
|
13
|
+
|
14
|
+
#
|
15
|
+
#�����
|
16
|
+
#
|
17
|
+
#������̤�����
|
18
|
+
open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
19
|
+
#������̤��ʤ�
|
20
|
+
open_and_scrape("http://www.youtube.com/results?search_query=", "aeudyr jahafudfhadf ahf", 2)
|
21
|
+
|
22
|
+
#
|
23
|
+
#�۾��
|
24
|
+
#
|
25
|
+
#������̤����뤬��Not Found��������
|
26
|
+
begin
|
27
|
+
open_local_file_and_scrape("html/dataY_noMsgY.htm")
|
28
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
29
|
+
rescue RuntimeError =>e
|
30
|
+
#puts e
|
31
|
+
end
|
32
|
+
#������̤��ʤ���Not Found��ʤ����
|
33
|
+
begin
|
34
|
+
open_local_file_and_scrape("html/dataN_noMsgN.htm")
|
35
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
36
|
+
rescue RuntimeError
|
37
|
+
#puts e
|
38
|
+
end
|
39
|
+
|
40
|
+
#�����ι��ܤ��������Ǥ��Ƥ��ʤ����
|
41
|
+
begin
|
42
|
+
open_local_file_and_scrape("html/scraping_error.html")
|
43
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
44
|
+
rescue RuntimeError => e
|
45
|
+
# puts e
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_scrape_video_count
|
50
|
+
|
51
|
+
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
52
|
+
puts scraper.video_count
|
53
|
+
assert( scraper.video_count > 0 )
|
54
|
+
|
55
|
+
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemonifdadfa", 2)
|
56
|
+
puts scraper.video_count
|
57
|
+
assert( scraper.video_count == 0 )
|
58
|
+
end
|
59
|
+
|
60
|
+
def open_and_scrape url, keyword=nil, page=nil
|
61
|
+
scraper = MySearchResultScraper.new(url, keyword, page)
|
62
|
+
scraper.open
|
63
|
+
scraper.scrape
|
64
|
+
scraper
|
65
|
+
end
|
66
|
+
|
67
|
+
def open_local_file_and_scrape url
|
68
|
+
scraper = MySearchResultScraper.new(url)
|
69
|
+
scraper.open_local_file
|
70
|
+
scraper.scrape
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
class MySearchResultScraper < Youtube::SearchResultScraper
|
76
|
+
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
77
|
+
|
78
|
+
def initialize url, keyword=nil, page=nil
|
79
|
+
@@youtube_search_base_url = url
|
80
|
+
@keyword = keyword
|
81
|
+
@page = page if not page == nil
|
82
|
+
end
|
83
|
+
|
84
|
+
def open_local_file
|
85
|
+
@html = Kernel.open(@@youtube_search_base_url).read
|
86
|
+
replace_document_write_javascript
|
87
|
+
@search_result = Hpricot.parse(@html)
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
#require "runit/testcase"
|
4
|
+
#require "runit/cui/testrunner"
|
5
|
+
require 'test/unit'
|
6
|
+
|
7
|
+
|
8
|
+
require "rubygems"
|
9
|
+
require 'hpricot'
|
10
|
+
require "youtube/searchresultscraper"
|
11
|
+
|
12
|
+
class SearchResultScraperTest < Test::Unit::TestCase
|
13
|
+
#class SearchResultScraperTest < RUNIT::TestCase
|
14
|
+
|
15
|
+
def test_scrape
|
16
|
+
|
17
|
+
#
|
18
|
+
#�����
|
19
|
+
#
|
20
|
+
#������̤�����
|
21
|
+
open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
22
|
+
#������̤��ʤ�
|
23
|
+
open_and_scrape("http://www.youtube.com/results?search_query=", "aeudyr jahafudfhadf ahf", 2)
|
24
|
+
|
25
|
+
#
|
26
|
+
#�۾��
|
27
|
+
#
|
28
|
+
#������̤����뤬��Not Found��������
|
29
|
+
begin
|
30
|
+
open_local_file_and_scrape("html/dataY_noMsgY.htm")
|
31
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
32
|
+
rescue RuntimeError =>e
|
33
|
+
#puts e
|
34
|
+
end
|
35
|
+
#������̤��ʤ���Not Found��ʤ����
|
36
|
+
begin
|
37
|
+
open_local_file_and_scrape("html/dataN_noMsgN.htm")
|
38
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
39
|
+
rescue RuntimeError
|
40
|
+
#puts e
|
41
|
+
end
|
42
|
+
|
43
|
+
#�����ι��ܤ��������Ǥ��Ƥ��ʤ����
|
44
|
+
begin
|
45
|
+
open_local_file_and_scrape("html/scraping_error.html")
|
46
|
+
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
47
|
+
rescue RuntimeError => e
|
48
|
+
# puts e
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_scrape_video_count
|
53
|
+
|
54
|
+
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
55
|
+
puts scraper.video_count
|
56
|
+
assert( scraper.video_count > 0 )
|
57
|
+
|
58
|
+
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemonifdadfa", 2)
|
59
|
+
puts scraper.video_count
|
60
|
+
assert( scraper.video_count == 0 )
|
61
|
+
end
|
62
|
+
|
63
|
+
def open_and_scrape url, keyword=nil, page=nil
|
64
|
+
scraper = MySearchResultScraper.new(url, keyword, page)
|
65
|
+
scraper.open
|
66
|
+
scraper.scrape
|
67
|
+
scraper
|
68
|
+
end
|
69
|
+
|
70
|
+
def open_local_file_and_scrape url
|
71
|
+
scraper = MySearchResultScraper.new(url)
|
72
|
+
scraper.open_local_file
|
73
|
+
scraper.scrape
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
class MySearchResultScraper < Youtube::SearchResultScraper
|
79
|
+
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
80
|
+
|
81
|
+
def initialize url, keyword=nil, page=nil
|
82
|
+
@@youtube_search_base_url = url
|
83
|
+
@keyword = keyword
|
84
|
+
@page = page if not page == nil
|
85
|
+
end
|
86
|
+
|
87
|
+
def open_local_file
|
88
|
+
@html = Kernel.open(@@youtube_search_base_url).read
|
89
|
+
replace_document_write_javascript
|
90
|
+
@search_result = Hpricot.parse(@html)
|
91
|
+
end
|
92
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: youtubescraper
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.0.7
|
7
|
+
date: 2007-03-29 00:00:00 +09:00
|
8
|
+
summary: This gem provide function to scrape html of search result on youtube
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email:
|
12
|
+
homepage:
|
13
|
+
rubyforge_project:
|
14
|
+
description:
|
15
|
+
autorequire: youtube/searchresultscraper
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.8.1
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Yuki SHIDA
|
31
|
+
files:
|
32
|
+
- lib/youtube
|
33
|
+
- lib/youtube/browsescraper.rb
|
34
|
+
- lib/youtube/searchresultscraper.rb~
|
35
|
+
- lib/youtube/video.rb
|
36
|
+
- lib/youtube/searchresultscraper.rb
|
37
|
+
- test/youtube_scraper_test.rb
|
38
|
+
- test/html
|
39
|
+
- test/youtube_scraper_test.rb~
|
40
|
+
- test/html/scraping_error.html
|
41
|
+
- test/html/dataY_noMsgY.htm
|
42
|
+
- test/html/dataN_noMsgN.htm
|
43
|
+
- CHANGELOG
|
44
|
+
- MIT-LICENSE
|
45
|
+
- README
|
46
|
+
test_files: []
|
47
|
+
|
48
|
+
rdoc_options: []
|
49
|
+
|
50
|
+
extra_rdoc_files:
|
51
|
+
- CHANGELOG
|
52
|
+
- MIT-LICENSE
|
53
|
+
- README
|
54
|
+
executables: []
|
55
|
+
|
56
|
+
extensions: []
|
57
|
+
|
58
|
+
requirements:
|
59
|
+
- hpricot rubygem
|
60
|
+
dependencies:
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: hpricot
|
63
|
+
version_requirement:
|
64
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0.4"
|
69
|
+
version:
|