youtubesearchresultscraper 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +6 -2
- data/README +20 -10
- data/lib/youtube/searchresultscraper.rb +53 -2
- data/lib/youtube/video.rb +3 -0
- metadata +3 -3
data/CHANGELOG
CHANGED
data/README
CHANGED
@@ -1,13 +1,23 @@
|
|
1
|
-
|
1
|
+
Introduction
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
Youtube::SearchResultScraper scrapes video information from search result page on www.youtube.com.
|
4
|
+
You can get result as array or xml.
|
5
|
+
XML format is same as YouTube Developer API (www.youtube.com/dev_api_ref?m=youtube.videos.list_by_tag).
|
6
6
|
|
7
|
-
|
8
|
-
scraper.open
|
9
|
-
scraper.scrape
|
10
|
-
puts scraper.get_xml
|
11
|
-
8< - - - - 8< - - - - 8< - - - - 8< - - - -
|
7
|
+
Example
|
12
8
|
|
13
|
-
|
9
|
+
require "rubygems"
|
10
|
+
require "youtube/searchresultscraper"
|
11
|
+
|
12
|
+
scraper = Youtube::SearchResultScraper.new(keyword, page)
|
13
|
+
scraper.open
|
14
|
+
scraper.scrape
|
15
|
+
puts scraper.get_xml
|
16
|
+
|
17
|
+
More Information
|
18
|
+
|
19
|
+
http://www.ark-web.jp/sandbox/wiki/184.html (japanese only)
|
20
|
+
|
21
|
+
Author: Yuki SHIDA, shida@in3c.org
|
22
|
+
Version: 0.0.2
|
23
|
+
License: MIT license
|
@@ -1,3 +1,4 @@
|
|
1
|
+
#--
|
1
2
|
# Copyright (C) 2006 by in3c.org
|
2
3
|
#
|
3
4
|
# Permission is hereby granted, free of charge, to any person obtaining
|
@@ -18,6 +19,9 @@
|
|
18
19
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
20
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
21
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
22
|
+
#++
|
23
|
+
# :main:Youtube::SearchResultScraper
|
24
|
+
# :title:Youtube::SearchResultScraper RDoc Documentation
|
21
25
|
|
22
26
|
require 'open-uri'
|
23
27
|
require 'cgi'
|
@@ -25,7 +29,33 @@ require 'rubygems'
|
|
25
29
|
require 'hpricot'
|
26
30
|
require 'youtube/video'
|
27
31
|
|
28
|
-
module Youtube
|
32
|
+
module Youtube #:nodoc:
|
33
|
+
|
34
|
+
# = Introduction
|
35
|
+
# Youtube::SearchResultScraper scrapes video information from search result page
|
36
|
+
# on http://www.youtube.com.
|
37
|
+
#
|
38
|
+
# You can get result as array or xml.
|
39
|
+
#
|
40
|
+
# XML format is same as YouTube Developer API
|
41
|
+
# (http://www.youtube.com/dev_api_ref?m=youtube.videos.list_by_tag).
|
42
|
+
#
|
43
|
+
# = Example
|
44
|
+
# require "rubygems"
|
45
|
+
# require "youtube/searchresultscraper"
|
46
|
+
#
|
47
|
+
# scraper = Youtube::SearchResultScraper.new(keyword, page)
|
48
|
+
# scraper.open
|
49
|
+
# scraper.scrape
|
50
|
+
# puts scraper.get_xml
|
51
|
+
#
|
52
|
+
# = More Information
|
53
|
+
# http://www.ark-web.jp/sandbox/wiki/184.html (japanese only)
|
54
|
+
#
|
55
|
+
# Author:: Yuki SHIDA <shida@in3c.org>
|
56
|
+
# Version:: 0.0.2
|
57
|
+
# License:: MIT license
|
58
|
+
|
29
59
|
class SearchResultScraper
|
30
60
|
|
31
61
|
attr_accessor :keyword
|
@@ -33,11 +63,21 @@ module Youtube
|
|
33
63
|
|
34
64
|
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
35
65
|
|
66
|
+
# Create Youtube::SearchResultScraper object specifying keyword and number of page.
|
67
|
+
#
|
68
|
+
# You cannot specify number of videos per page.
|
69
|
+
# Always, the number of videos is 20 per page.
|
70
|
+
#
|
71
|
+
# * keyword - specify keyword that you want to search on YouTube.
|
72
|
+
# You must specify keyword encoded by UTF-8.
|
73
|
+
# * page - specify number of page
|
74
|
+
|
36
75
|
def initialize keyword, page=nil
|
37
76
|
@keyword = keyword
|
38
77
|
@page = page if not page == nil
|
39
78
|
end
|
40
79
|
|
80
|
+
# Get search result from youtube by specified keyword.
|
41
81
|
def open
|
42
82
|
url = @@youtube_search_base_url + CGI.escape(@keyword)
|
43
83
|
url += "&page=#{@page}" if not @page == nil
|
@@ -46,6 +86,7 @@ module Youtube
|
|
46
86
|
@search_result = Hpricot.parse(@html)
|
47
87
|
end
|
48
88
|
|
89
|
+
# Scrape video information from search result html.
|
49
90
|
def scrape
|
50
91
|
@videos = []
|
51
92
|
|
@@ -64,11 +105,21 @@ module Youtube
|
|
64
105
|
video.url = scrape_url(video_html)
|
65
106
|
@videos << video
|
66
107
|
end
|
108
|
+
|
109
|
+
@videos
|
110
|
+
end
|
111
|
+
|
112
|
+
# Iterator for scraped videos.
|
113
|
+
def each
|
114
|
+
@videos.each do |video|
|
115
|
+
yield video
|
116
|
+
end
|
67
117
|
end
|
68
118
|
|
119
|
+
# Return videos information as XML Format.
|
69
120
|
def get_xml
|
70
121
|
xml = "<ut_response status=\"ok\"><video_list>\n"
|
71
|
-
|
122
|
+
each do |video|
|
72
123
|
xml += video.to_xml
|
73
124
|
end
|
74
125
|
xml += "</video_list></ut_response>"
|
data/lib/youtube/video.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#--
|
1
2
|
# Copyright (C) 2006 by in3c.org
|
2
3
|
# http://in3c.org/
|
3
4
|
#
|
@@ -19,6 +20,7 @@
|
|
19
20
|
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
20
21
|
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
21
22
|
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
#++
|
22
24
|
|
23
25
|
module Youtube
|
24
26
|
|
@@ -37,6 +39,7 @@ module Youtube
|
|
37
39
|
attr_accessor :url
|
38
40
|
attr_accessor :thumbnail_url
|
39
41
|
|
42
|
+
# Return self information as XML format.
|
40
43
|
def to_xml
|
41
44
|
xml = "<video>\n"
|
42
45
|
instance_variables.each do |attr|
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: youtubesearchresultscraper
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.0.2
|
7
|
+
date: 2006-12-03 00:00:00 +09:00
|
8
8
|
summary: This gem provide function to scrape html of search result on youtube
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -15,7 +15,7 @@ description:
|
|
15
15
|
autorequire: youtube/searchresultscraper
|
16
16
|
default_executable:
|
17
17
|
bindir: bin
|
18
|
-
has_rdoc:
|
18
|
+
has_rdoc: true
|
19
19
|
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
20
|
requirements:
|
21
21
|
- - ">="
|