youtubescraper 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +4 -0
- data/lib/youtube/searchresultscraper.rb +27 -1
- data/lib/youtube/video.rb +4 -3
- metadata +2 -3
- data/test/youtube_scraper_test.rb~ +0 -92
data/CHANGELOG
CHANGED
@@ -61,10 +61,16 @@ module Youtube #:nodoc:
|
|
61
61
|
|
62
62
|
attr_accessor :keyword
|
63
63
|
attr_accessor :page
|
64
|
+
attr_accessor :sort
|
64
65
|
attr_reader :video_count
|
65
66
|
attr_reader :video_from
|
66
67
|
attr_reader :video_to
|
67
68
|
|
69
|
+
Relevance = 'relevance'
|
70
|
+
DateAdded = 'video_date_uploaded'
|
71
|
+
ViewCount = 'video_view_count'
|
72
|
+
Rating = 'video_avg_rating'
|
73
|
+
|
68
74
|
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
69
75
|
|
70
76
|
# Create Youtube::SearchResultScraper object specifying keyword and number of page.
|
@@ -75,16 +81,19 @@ module Youtube #:nodoc:
|
|
75
81
|
# * keyword - specify keyword that you want to search on YouTube.
|
76
82
|
# You must specify keyword encoded by UTF-8.
|
77
83
|
# * page - specify number of page
|
84
|
+
# * sort - specify sort rule
|
78
85
|
|
79
|
-
def initialize keyword, page=nil
|
86
|
+
def initialize keyword, page=nil, sort=nil
|
80
87
|
@keyword = keyword
|
81
88
|
@page = page if not page == nil
|
89
|
+
@sort = sort if not sort == nil
|
82
90
|
end
|
83
91
|
|
84
92
|
# Get search result from youtube by specified keyword.
|
85
93
|
def open
|
86
94
|
@url = @@youtube_search_base_url + CGI.escape(@keyword)
|
87
95
|
@url += "&page=#{@page}" if not @page == nil
|
96
|
+
@url += "&search_sort=#{@sort}" if not @sort == nil
|
88
97
|
@html = Kernel.open(@url).read
|
89
98
|
replace_document_write_javascript
|
90
99
|
@search_result = Hpricot.parse(@html)
|
@@ -106,6 +115,7 @@ module Youtube #:nodoc:
|
|
106
115
|
video.view_count = scrape_view_count(video_html)
|
107
116
|
video.thumbnail_url = scrape_thumbnail_url(video_html)
|
108
117
|
video.tags = scrape_tags(video_html)
|
118
|
+
video.upload_time = scrape_upload_time(video_html)
|
109
119
|
video.url = scrape_url(video_html)
|
110
120
|
|
111
121
|
check_video video
|
@@ -189,6 +199,22 @@ module Youtube #:nodoc:
|
|
189
199
|
tags.join(" ")
|
190
200
|
end
|
191
201
|
|
202
|
+
def scrape_upload_time video_html
|
203
|
+
if video_html.search("div[@class='vfacets']").inner_html =~ /.*Added:<\/span>\s*(\d+)\s*(hour|day|week|month|year).*/m
|
204
|
+
if $2 == "hour"
|
205
|
+
Time.now - $1.to_i * 60 * 60
|
206
|
+
elsif $2 == "day"
|
207
|
+
Time.now - $1.to_i * 60 * 60 * 24
|
208
|
+
elsif $2 == "week"
|
209
|
+
Time.now - $1.to_i * 60 * 60 * 24 * 7
|
210
|
+
elsif $2 == "month"
|
211
|
+
Time.now - $1.to_i * 60 * 60 * 24 * 30
|
212
|
+
elsif $2 == "year"
|
213
|
+
Time.now - $1.to_i * 60 * 60 * 24 * 30 * 12
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
192
218
|
def scrape_thumbnail_url video_html
|
193
219
|
video_html.search("img[@class='vimg120']").to_html.sub(/.*src="(.*?)".*/, '\1')
|
194
220
|
end
|
data/lib/youtube/video.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#--
|
2
|
-
# Copyright (C) 2006 by in3c.org
|
3
|
-
# http://in3c.org/
|
2
|
+
# Copyright (C) 2006 by in3c.org
|
3
|
+
# http://in3c.org/
|
4
4
|
#
|
5
5
|
# Permission is hereby granted, free of charge, to any person obtaining
|
6
6
|
# a copy of this software and associated documentation files (the
|
@@ -36,13 +36,14 @@ module Youtube
|
|
36
36
|
attr_accessor :upload_time
|
37
37
|
attr_accessor :comment_count
|
38
38
|
attr_accessor :tags
|
39
|
+
attr_accessor :upload_time
|
39
40
|
attr_accessor :url
|
40
41
|
attr_accessor :thumbnail_url
|
41
42
|
|
42
43
|
# Return self information as XML format.
|
43
44
|
def to_xml
|
44
45
|
xml = "<video>\n"
|
45
|
-
instance_variables.each do |attr|
|
46
|
+
instance_variables.each do |attr|
|
46
47
|
value = instance_variable_get(attr).to_s
|
47
48
|
value.gsub!(/<br \/>/, "\n")
|
48
49
|
value.gsub!(/<.*?>/m, '')
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: youtubescraper
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.0.
|
7
|
-
date: 2007-03-
|
6
|
+
version: 0.0.9
|
7
|
+
date: 2007-03-30 00:00:00 +09:00
|
8
8
|
summary: This gem provide function to scrape html of search result on youtube
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -35,7 +35,6 @@ files:
|
|
35
35
|
- lib/youtube/searchresultscraper.rb
|
36
36
|
- test/youtube_scraper_test.rb
|
37
37
|
- test/html
|
38
|
-
- test/youtube_scraper_test.rb~
|
39
38
|
- test/html/scraping_error.html
|
40
39
|
- test/html/dataY_noMsgY.htm
|
41
40
|
- test/html/dataN_noMsgN.htm
|
@@ -1,92 +0,0 @@
|
|
1
|
-
#!/usr/bin/ruby
|
2
|
-
|
3
|
-
#require "runit/testcase"
|
4
|
-
#require "runit/cui/testrunner"
|
5
|
-
require 'test/unit'
|
6
|
-
|
7
|
-
|
8
|
-
require "rubygems"
|
9
|
-
require 'hpricot'
|
10
|
-
require "youtube/searchresultscraper"
|
11
|
-
|
12
|
-
class SearchResultScraperTest < Test::Unit::TestCase
|
13
|
-
#class SearchResultScraperTest < RUNIT::TestCase
|
14
|
-
|
15
|
-
def test_scrape
|
16
|
-
|
17
|
-
#
|
18
|
-
#�����
|
19
|
-
#
|
20
|
-
#������̤�����
|
21
|
-
open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
22
|
-
#������̤��ʤ�
|
23
|
-
open_and_scrape("http://www.youtube.com/results?search_query=", "aeudyr jahafudfhadf ahf", 2)
|
24
|
-
|
25
|
-
#
|
26
|
-
#�۾��
|
27
|
-
#
|
28
|
-
#������̤����뤬��Not Found��������
|
29
|
-
begin
|
30
|
-
open_local_file_and_scrape("html/dataY_noMsgY.htm")
|
31
|
-
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
32
|
-
rescue RuntimeError =>e
|
33
|
-
#puts e
|
34
|
-
end
|
35
|
-
#������̤��ʤ���Not Found��ʤ����
|
36
|
-
begin
|
37
|
-
open_local_file_and_scrape("html/dataN_noMsgN.htm")
|
38
|
-
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
39
|
-
rescue RuntimeError
|
40
|
-
#puts e
|
41
|
-
end
|
42
|
-
|
43
|
-
#�����ι��ܤ��������Ǥ��Ƥ��ʤ����
|
44
|
-
begin
|
45
|
-
open_local_file_and_scrape("html/scraping_error.html")
|
46
|
-
assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
|
47
|
-
rescue RuntimeError => e
|
48
|
-
# puts e
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_scrape_video_count
|
53
|
-
|
54
|
-
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
|
55
|
-
puts scraper.video_count
|
56
|
-
assert( scraper.video_count > 0 )
|
57
|
-
|
58
|
-
scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemonifdadfa", 2)
|
59
|
-
puts scraper.video_count
|
60
|
-
assert( scraper.video_count == 0 )
|
61
|
-
end
|
62
|
-
|
63
|
-
def open_and_scrape url, keyword=nil, page=nil
|
64
|
-
scraper = MySearchResultScraper.new(url, keyword, page)
|
65
|
-
scraper.open
|
66
|
-
scraper.scrape
|
67
|
-
scraper
|
68
|
-
end
|
69
|
-
|
70
|
-
def open_local_file_and_scrape url
|
71
|
-
scraper = MySearchResultScraper.new(url)
|
72
|
-
scraper.open_local_file
|
73
|
-
scraper.scrape
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
class MySearchResultScraper < Youtube::SearchResultScraper
|
79
|
-
@@youtube_search_base_url = "http://www.youtube.com/results?search_query="
|
80
|
-
|
81
|
-
def initialize url, keyword=nil, page=nil
|
82
|
-
@@youtube_search_base_url = url
|
83
|
-
@keyword = keyword
|
84
|
-
@page = page if not page == nil
|
85
|
-
end
|
86
|
-
|
87
|
-
def open_local_file
|
88
|
-
@html = Kernel.open(@@youtube_search_base_url).read
|
89
|
-
replace_document_write_javascript
|
90
|
-
@search_result = Hpricot.parse(@html)
|
91
|
-
end
|
92
|
-
end
|