youtubescraper 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +4 -0
- data/lib/youtube/searchresultscraper.rb +27 -1
- data/lib/youtube/video.rb +4 -3
- metadata +2 -3
- data/test/youtube_scraper_test.rb~ +0 -92
data/CHANGELOG
CHANGED
@@ -61,10 +61,16 @@ module Youtube #:nodoc:
|
|
61
61
|
|
62
62
|
attr_accessor :keyword
|
63
63
|
attr_accessor :page
|
64
|
+
attr_accessor :sort
|
64
65
|
attr_reader :video_count
|
65
66
|
attr_reader :video_from
|
66
67
|
attr_reader :video_to
|
67
68
|
|
69
|
+
Relevance = 'relevance'
|
70
|
+
DateAdded = 'video_date_uploaded'
|
71
|
+
ViewCount = 'video_view_count'
|
72
|
+
Rating = 'video_avg_rating'
|
73
|
+
|
68
74
|
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
69
75
|
|
70
76
|
# Create Youtube::SearchResultScraper object specifying keyword and number of page.
|
@@ -75,16 +81,19 @@ module Youtube #:nodoc:
|
|
75
81
|
# * keyword - specify keyword that you want to search on YouTube.
|
76
82
|
# You must specify keyword encoded by UTF-8.
|
77
83
|
# * page - specify number of page
|
84
|
+
# * sort - specify sort rule
|
78
85
|
|
79
|
-
def initialize keyword, page=nil
|
86
|
+
def initialize keyword, page=nil, sort=nil
|
80
87
|
@keyword = keyword
|
81
88
|
@page = page if not page == nil
|
89
|
+
@sort = sort if not sort == nil
|
82
90
|
end
|
83
91
|
|
84
92
|
# Get search result from youtube by specified keyword.
|
85
93
|
def open
|
86
94
|
@url = @@youtube_search_base_url + CGI.escape(@keyword)
|
87
95
|
@url += "&page=#{@page}" if not @page == nil
|
96
|
+
@url += "&search_sort=#{@sort}" if not @sort == nil
|
88
97
|
@html = Kernel.open(@url).read
|
89
98
|
replace_document_write_javascript
|
90
99
|
@search_result = Hpricot.parse(@html)
|
@@ -106,6 +115,7 @@ module Youtube #:nodoc:
|
|
106
115
|
video.view_count = scrape_view_count(video_html)
|
107
116
|
video.thumbnail_url = scrape_thumbnail_url(video_html)
|
108
117
|
video.tags = scrape_tags(video_html)
|
118
|
+
video.upload_time = scrape_upload_time(video_html)
|
109
119
|
video.url = scrape_url(video_html)
|
110
120
|
|
111
121
|
check_video video
|
@@ -189,6 +199,22 @@ module Youtube #:nodoc:
|
|
189
199
|
tags.join(" ")
|
190
200
|
end
|
191
201
|
|
202
|
+
def scrape_upload_time video_html
|
203
|
+
if video_html.search("div[@class='vfacets']").inner_html =~ /.*Added:<\/span>\s*(\d+)\s*(hour|day|week|month|year).*/m
|
204
|
+
if $2 == "hour"
|
205
|
+
Time.now - $1.to_i * 60 * 60
|
206
|
+
elsif $2 == "day"
|
207
|
+
Time.now - $1.to_i * 60 * 60 * 24
|
208
|
+
elsif $2 == "week"
|
209
|
+
Time.now - $1.to_i * 60 * 60 * 24 * 7
|
210
|
+
elsif $2 == "month"
|
211
|
+
Time.now - $1.to_i * 60 * 60 * 24 * 30
|
212
|
+
elsif $2 == "year"
|
213
|
+
Time.now - $1.to_i * 60 * 60 * 24 * 30 * 12
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
192
218
|
def scrape_thumbnail_url video_html
|
193
219
|
video_html.search("img[@class='vimg120']").to_html.sub(/.*src="(.*?)".*/, '\1')
|
194
220
|
end
|
data/lib/youtube/video.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (C) 2006 by in3c.org
|
3
|
-
# http://in3c.org/
|
2
|
+
# Copyright (C) 2006 by in3c.org
|
3
|
+
# http://in3c.org/
|
4
4
|
#
|
5
5
|
# Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
# a copy of this software and associated documentation files (the
|
@@ -36,13 +36,14 @@ module Youtube
|
|
36
36
|
attr_accessor :upload_time
|
37
37
|
attr_accessor :comment_count
|
38
38
|
attr_accessor :tags
|
39
|
+
attr_accessor :upload_time
|
39
40
|
attr_accessor :url
|
40
41
|
attr_accessor :thumbnail_url
|
41
42
|
|
42
43
|
# Return self information as XML format.
|
43
44
|
def to_xml
|
44
45
|
xml = "<video>\n"
|
45
|
-
instance_variables.each do |attr|
|
46
|
+
instance_variables.each do |attr|
|
46
47
|
value = instance_variable_get(attr).to_s
|
47
48
|
value.gsub!(/<br \/>/, "\n")
|
48
49
|
value.gsub!(/<.*?>/m, '')
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: youtubescraper
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2007-03-
|
6
|
+
version: 0.0.9
|
7
|
+
date: 2007-03-30 00:00:00 +09:00
|
8
8
|
summary: This gem provide function to scrape html of search result on youtube
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -35,7 +35,6 @@ files:
|
|
35
35
|
- lib/youtube/searchresultscraper.rb
|
36
36
|
- test/youtube_scraper_test.rb
|
37
37
|
- test/html
|
38
|
-
- test/youtube_scraper_test.rb~
|
39
38
|
- test/html/scraping_error.html
|
40
39
|
- test/html/dataY_noMsgY.htm
|
41
40
|
- test/html/dataN_noMsgN.htm
|
@@ -1,92 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
#require "runit/testcase"
|
4
|
-
#require "runit/cui/testrunner"
|
5
|
-
require 'test/unit'
|
6
|
-
|
7
|
-
|
8
|
-
require "rubygems"
|
9
|
-
require 'hpricot'
|
10
|
-
require "youtube/searchresultscraper"
|
11
|
-
|
12
|
-
class SearchResultScraperTest < Test::Unit::TestCase
|
13
|
-
#class SearchResultScraperTest < RUNIT::TestCase
|
14
|
-
|
15
|
-
def test_scrape
|
16
|
-
|
17
|
-
#
|
18
|
-
#�����
|
19
|
-
#
|
20
|
-
#������̤�����
|
21
|
-
open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
22
|
-
#������̤��ʤ�
|
23
|
-
open_and_scrape("http://www.youtube.com/results?search_query=", "aeudyr jahafudfhadf ahf", 2)
|
24
|
-
|
25
|
-
#
|
26
|
-
#�۾��
|
27
|
-
#
|
28
|
-
#������̤����뤬��Not Found��������
|
29
|
-
begin
|
30
|
-
open_local_file_and_scrape("html/dataY_noMsgY.htm")
|
31
|
-
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
32
|
-
rescue RuntimeError =>e
|
33
|
-
#puts e
|
34
|
-
end
|
35
|
-
#������̤��ʤ���Not Found��ʤ����
|
36
|
-
begin
|
37
|
-
open_local_file_and_scrape("html/dataN_noMsgN.htm")
|
38
|
-
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
39
|
-
rescue RuntimeError
|
40
|
-
#puts e
|
41
|
-
end
|
42
|
-
|
43
|
-
#�����ι��ܤ��������Ǥ��Ƥ��ʤ����
|
44
|
-
begin
|
45
|
-
open_local_file_and_scrape("html/scraping_error.html")
|
46
|
-
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
47
|
-
rescue RuntimeError => e
|
48
|
-
# puts e
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_scrape_video_count
|
53
|
-
|
54
|
-
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
55
|
-
puts scraper.video_count
|
56
|
-
assert( scraper.video_count > 0 )
|
57
|
-
|
58
|
-
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemonifdadfa", 2)
|
59
|
-
puts scraper.video_count
|
60
|
-
assert( scraper.video_count == 0 )
|
61
|
-
end
|
62
|
-
|
63
|
-
def open_and_scrape url, keyword=nil, page=nil
|
64
|
-
scraper = MySearchResultScraper.new(url, keyword, page)
|
65
|
-
scraper.open
|
66
|
-
scraper.scrape
|
67
|
-
scraper
|
68
|
-
end
|
69
|
-
|
70
|
-
def open_local_file_and_scrape url
|
71
|
-
scraper = MySearchResultScraper.new(url)
|
72
|
-
scraper.open_local_file
|
73
|
-
scraper.scrape
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
class MySearchResultScraper < Youtube::SearchResultScraper
|
79
|
-
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
80
|
-
|
81
|
-
def initialize url, keyword=nil, page=nil
|
82
|
-
@@youtube_search_base_url = url
|
83
|
-
@keyword = keyword
|
84
|
-
@page = page if not page == nil
|
85
|
-
end
|
86
|
-
|
87
|
-
def open_local_file
|
88
|
-
@html = Kernel.open(@@youtube_search_base_url).read
|
89
|
-
replace_document_write_javascript
|
90
|
-
@search_result = Hpricot.parse(@html)
|
91
|
-
end
|
92
|
-
end
|