youtubescraper 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -25,3 +25,7 @@
25
25
 
26
26
  0.0.8 2007-03-29
27
27
  Description is not required.
28
+
29
+ 0.0.9 2007-03-30
30
+ Add sort function for Youtube::SearchResultScraper
31
+ Scrape upload_time from search result page
@@ -61,10 +61,16 @@ module Youtube #:nodoc:
61
61
 
62
62
  attr_accessor :keyword
63
63
  attr_accessor :page
64
+ attr_accessor :sort
64
65
  attr_reader :video_count
65
66
  attr_reader :video_from
66
67
  attr_reader :video_to
67
68
 
69
+ Relevance = 'relevance'
70
+ DateAdded = 'video_date_uploaded'
71
+ ViewCount = 'video_view_count'
72
+ Rating = 'video_avg_rating'
73
+
68
74
  @@youtube_search_base_url = "http://www.youtube.com/results?search_query="
69
75
 
70
76
  # Create Youtube::SearchResultScraper object specifying keyword and number of page.
@@ -75,16 +81,19 @@ module Youtube #:nodoc:
75
81
  # * keyword - specify keyword that you want to search on YouTube.
76
82
  # You must specify keyword encoded by UTF-8.
77
83
  # * page - specify number of page
84
+ # * sort - specify sort rule
78
85
 
79
- def initialize keyword, page=nil
86
+ def initialize keyword, page=nil, sort=nil
80
87
  @keyword = keyword
81
88
  @page = page if not page == nil
89
+ @sort = sort if not sort == nil
82
90
  end
83
91
 
84
92
  # Get search result from youtube by specified keyword.
85
93
  def open
86
94
  @url = @@youtube_search_base_url + CGI.escape(@keyword)
87
95
  @url += "&page=#{@page}" if not @page == nil
96
+ @url += "&search_sort=#{@sort}" if not @sort == nil
88
97
  @html = Kernel.open(@url).read
89
98
  replace_document_write_javascript
90
99
  @search_result = Hpricot.parse(@html)
@@ -106,6 +115,7 @@ module Youtube #:nodoc:
106
115
  video.view_count = scrape_view_count(video_html)
107
116
  video.thumbnail_url = scrape_thumbnail_url(video_html)
108
117
  video.tags = scrape_tags(video_html)
118
+ video.upload_time = scrape_upload_time(video_html)
109
119
  video.url = scrape_url(video_html)
110
120
 
111
121
  check_video video
@@ -189,6 +199,22 @@ module Youtube #:nodoc:
189
199
  tags.join(" ")
190
200
  end
191
201
 
202
+ def scrape_upload_time video_html
203
+ if video_html.search("div[@class='vfacets']").inner_html =~ /.*Added:<\/span>\s*(\d+)\s*(hour|day|week|month|year).*/m
204
+ if $2 == "hour"
205
+ Time.now - $1.to_i * 60 * 60
206
+ elsif $2 == "day"
207
+ Time.now - $1.to_i * 60 * 60 * 24
208
+ elsif $2 == "week"
209
+ Time.now - $1.to_i * 60 * 60 * 24 * 7
210
+ elsif $2 == "month"
211
+ Time.now - $1.to_i * 60 * 60 * 24 * 30
212
+ elsif $2 == "year"
213
+ Time.now - $1.to_i * 60 * 60 * 24 * 30 * 12
214
+ end
215
+ end
216
+ end
217
+
192
218
  def scrape_thumbnail_url video_html
193
219
  video_html.search("img[@class='vimg120']").to_html.sub(/.*src="(.*?)".*/, '\1')
194
220
  end
data/lib/youtube/video.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  #--
2
- # Copyright (C) 2006 by in3c.org
3
- # http://in3c.org/
2
+ # Copyright (C) 2006 by in3c.org
3
+ # http://in3c.org/
4
4
  #
5
5
  # Permission is hereby granted, free of charge, to any person obtaining
6
6
  # a copy of this software and associated documentation files (the
@@ -36,13 +36,14 @@ module Youtube
36
36
  attr_accessor :upload_time
37
37
  attr_accessor :comment_count
38
38
  attr_accessor :tags
39
+ attr_accessor :upload_time
39
40
  attr_accessor :url
40
41
  attr_accessor :thumbnail_url
41
42
 
42
43
  # Return self information as XML format.
43
44
  def to_xml
44
45
  xml = "<video>\n"
45
- instance_variables.each do |attr|
46
+ instance_variables.each do |attr|
46
47
  value = instance_variable_get(attr).to_s
47
48
  value.gsub!(/<br \/>/, "\n")
48
49
  value.gsub!(/<.*?>/m, '')
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: youtubescraper
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.8
7
- date: 2007-03-29 00:00:00 +09:00
6
+ version: 0.0.9
7
+ date: 2007-03-30 00:00:00 +09:00
8
8
  summary: This gem provide function to scrape html of search result on youtube
9
9
  require_paths:
10
10
  - lib
@@ -35,7 +35,6 @@ files:
35
35
  - lib/youtube/searchresultscraper.rb
36
36
  - test/youtube_scraper_test.rb
37
37
  - test/html
38
- - test/youtube_scraper_test.rb~
39
38
  - test/html/scraping_error.html
40
39
  - test/html/dataY_noMsgY.htm
41
40
  - test/html/dataN_noMsgN.htm
@@ -1,92 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- #require "runit/testcase"
4
- #require "runit/cui/testrunner"
5
- require 'test/unit'
6
-
7
-
8
- require "rubygems"
9
- require 'hpricot'
10
- require "youtube/searchresultscraper"
11
-
12
- class SearchResultScraperTest < Test::Unit::TestCase
13
- #class SearchResultScraperTest < RUNIT::TestCase
14
-
15
- def test_scrape
16
-
17
- #
18
- #�����
19
- #
20
- #������̤�����
21
- open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
22
- #������̤��ʤ�
23
- open_and_scrape("http://www.youtube.com/results?search_query=", "aeudyr jahafudfhadf ahf", 2)
24
-
25
- #
26
- #�۾��
27
- #
28
- #������̤����뤬��Not Found��������
29
- begin
30
- open_local_file_and_scrape("html/dataY_noMsgY.htm")
31
- assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
32
- rescue RuntimeError =>e
33
- #puts e
34
- end
35
- #������̤��ʤ���Not Found��ʤ����
36
- begin
37
- open_local_file_and_scrape("html/dataN_noMsgN.htm")
38
- assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
39
- rescue RuntimeError
40
- #puts e
41
- end
42
-
43
- #�����ι��ܤ��������Ǥ��Ƥ��ʤ����
44
- begin
45
- open_local_file_and_scrape("html/scraping_error.html")
46
- assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
47
- rescue RuntimeError => e
48
- # puts e
49
- end
50
- end
51
-
52
- def test_scrape_video_count
53
-
54
- scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
55
- puts scraper.video_count
56
- assert( scraper.video_count > 0 )
57
-
58
- scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemonifdadfa", 2)
59
- puts scraper.video_count
60
- assert( scraper.video_count == 0 )
61
- end
62
-
63
- def open_and_scrape url, keyword=nil, page=nil
64
- scraper = MySearchResultScraper.new(url, keyword, page)
65
- scraper.open
66
- scraper.scrape
67
- scraper
68
- end
69
-
70
- def open_local_file_and_scrape url
71
- scraper = MySearchResultScraper.new(url)
72
- scraper.open_local_file
73
- scraper.scrape
74
- end
75
-
76
- end
77
-
78
- class MySearchResultScraper < Youtube::SearchResultScraper
79
- @@youtube_search_base_url = "http://www.youtube.com/results?search_query="
80
-
81
- def initialize url, keyword=nil, page=nil
82
- @@youtube_search_base_url = url
83
- @keyword = keyword
84
- @page = page if not page == nil
85
- end
86
-
87
- def open_local_file
88
- @html = Kernel.open(@@youtube_search_base_url).read
89
- replace_document_write_javascript
90
- @search_result = Hpricot.parse(@html)
91
- end
92
- end