youtubesearchresultscraper 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +6 -2
- data/README +20 -10
- data/lib/youtube/searchresultscraper.rb +53 -2
- data/lib/youtube/video.rb +3 -0
- metadata +3 -3
data/CHANGELOG
CHANGED
data/README
CHANGED
@@ -1,13 +1,23 @@
|
|
1
|
-
|
1
|
+
Introduction
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
Youtube::SearchResultScraper scrapes video information from search result page on www.youtube.com.
|
4
|
+
You can get result as array or xml.
|
5
|
+
XML format is same as YouTube Developer API (www.youtube.com/dev_api_ref?m=youtube.videos.list_by_tag).
|
6
6
|
|
7
|
-
|
8
|
-
scraper.open
|
9
|
-
scraper.scrape
|
10
|
-
puts scraper.get_xml
|
11
|
-
8< - - - - 8< - - - - 8< - - - - 8< - - - -
|
7
|
+
Example
|
12
8
|
|
13
|
-
|
9
|
+
require "rubygems"
|
10
|
+
require "youtube/searchresultscraper"
|
11
|
+
|
12
|
+
scraper = Youtube::SearchResultScraper.new(keyword, page)
|
13
|
+
scraper.open
|
14
|
+
scraper.scrape
|
15
|
+
puts scraper.get_xml
|
16
|
+
|
17
|
+
More Information
|
18
|
+
|
19
|
+
http://www.ark-web.jp/sandbox/wiki/184.html (japanese only)
|
20
|
+
|
21
|
+
Author: Yuki SHIDA, shida@in3c.org
|
22
|
+
Version: 0.0.2
|
23
|
+
License: MIT license
|
@@ -1,3 +1,4 @@
|
|
1
|
+
#--
|
1
2
|
# Copyright (C) 2006 by in3c.org
|
2
3
|
#
|
3
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
@@ -18,6 +19,9 @@
|
|
18
19
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
20
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
# :main:Youtube::SearchResultScraper
|
24
|
+
# :title:Youtube::SearchResultScraper RDoc Documentation
|
21
25
|
|
22
26
|
require 'open-uri'
|
23
27
|
require 'cgi'
|
@@ -25,7 +29,33 @@ require 'rubygems'
|
|
25
29
|
require 'hpricot'
|
26
30
|
require 'youtube/video'
|
27
31
|
|
28
|
-
module Youtube
|
32
|
+
module Youtube #:nodoc:
|
33
|
+
|
34
|
+
# = Introduction
|
35
|
+
# Youtube::SearchResultScraper scrapes video information from search result page
|
36
|
+
# on http://www.youtube.com.
|
37
|
+
#
|
38
|
+
# You can get result as array or xml.
|
39
|
+
#
|
40
|
+
# XML format is same as YouTube Developer API
|
41
|
+
# (http://www.youtube.com/dev_api_ref?m=youtube.videos.list_by_tag).
|
42
|
+
#
|
43
|
+
# = Example
|
44
|
+
# require "rubygems"
|
45
|
+
# require "youtube/searchresultscraper"
|
46
|
+
#
|
47
|
+
# scraper = Youtube::SearchResultScraper.new(keyword, page)
|
48
|
+
# scraper.open
|
49
|
+
# scraper.scrape
|
50
|
+
# puts scraper.get_xml
|
51
|
+
#
|
52
|
+
# = More Information
|
53
|
+
# http://www.ark-web.jp/sandbox/wiki/184.html (japanese only)
|
54
|
+
#
|
55
|
+
# Author:: Yuki SHIDA <shida@in3c.org>
|
56
|
+
# Version:: 0.0.2
|
57
|
+
# License:: MIT license
|
58
|
+
|
29
59
|
class SearchResultScraper
|
30
60
|
|
31
61
|
attr_accessor :keyword
|
@@ -33,11 +63,21 @@ module Youtube
|
|
33
63
|
|
34
64
|
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
35
65
|
|
66
|
+
# Create Youtube::SearchResultScraper object specifying keyword and number of page.
|
67
|
+
#
|
68
|
+
# You cannot specify number of videos per page.
|
69
|
+
# Always, the number of videos is 20 per page.
|
70
|
+
#
|
71
|
+
# * keyword - specify keyword that you want to search on YouTube.
|
72
|
+
# You must specify keyword encoded by UTF-8.
|
73
|
+
# * page - specify number of page
|
74
|
+
|
36
75
|
def initialize keyword, page=nil
|
37
76
|
@keyword = keyword
|
38
77
|
@page = page if not page == nil
|
39
78
|
end
|
40
79
|
|
80
|
+
# Get search result from youtube by specified keyword.
|
41
81
|
def open
|
42
82
|
url = @@youtube_search_base_url + CGI.escape(@keyword)
|
43
83
|
url += "&page=#{@page}" if not @page == nil
|
@@ -46,6 +86,7 @@ module Youtube
|
|
46
86
|
@search_result = Hpricot.parse(@html)
|
47
87
|
end
|
48
88
|
|
89
|
+
# Scrape video information from search result html.
|
49
90
|
def scrape
|
50
91
|
@videos = []
|
51
92
|
|
@@ -64,11 +105,21 @@ module Youtube
|
|
64
105
|
video.url = scrape_url(video_html)
|
65
106
|
@videos << video
|
66
107
|
end
|
108
|
+
|
109
|
+
@videos
|
110
|
+
end
|
111
|
+
|
112
|
+
# Iterator for scraped videos.
|
113
|
+
def each
|
114
|
+
@videos.each do |video|
|
115
|
+
yield video
|
116
|
+
end
|
67
117
|
end
|
68
118
|
|
119
|
+
# Return videos information as XML Format.
|
69
120
|
def get_xml
|
70
121
|
xml = "<ut_response status=\"ok\"><video_list>\n"
|
71
|
-
|
122
|
+
each do |video|
|
72
123
|
xml += video.to_xml
|
73
124
|
end
|
74
125
|
xml += "</video_list></ut_response>"
|
data/lib/youtube/video.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#--
|
1
2
|
# Copyright (C) 2006 by in3c.org
|
2
3
|
# http://in3c.org/
|
3
4
|
#
|
@@ -19,6 +20,7 @@
|
|
19
20
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
21
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
22
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#++
|
22
24
|
|
23
25
|
module Youtube
|
24
26
|
|
@@ -37,6 +39,7 @@ module Youtube
|
|
37
39
|
attr_accessor :url
|
38
40
|
attr_accessor :thumbnail_url
|
39
41
|
|
42
|
+
# Return self information as XML format.
|
40
43
|
def to_xml
|
41
44
|
xml = "<video>\n"
|
42
45
|
instance_variables.each do |attr|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: youtubesearchresultscraper
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.0.2
|
7
|
+
date: 2006-12-03 00:00:00 +09:00
|
8
8
|
summary: This gem provide function to scrape html of search result on youtube
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -15,7 +15,7 @@ description:
|
|
15
15
|
autorequire: youtube/searchresultscraper
|
16
16
|
default_executable:
|
17
17
|
bindir: bin
|
18
|
-
has_rdoc:
|
18
|
+
has_rdoc: true
|
19
19
|
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
20
|
requirements:
|
21
21
|
- - ">="
|