youtubescraper 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -25,3 +25,7 @@
25
25
 
26
26
  0.0.8 2007-03-29
27
27
  Description is not required.
28
+
29
+ 0.0.9 2007-03-30
30
+ Add sort function for Youtube::SearchResultScraper
31
+ Scrape upload_time from search result page
@@ -61,10 +61,16 @@ module Youtube #:nodoc:
61
61
 
62
62
  attr_accessor :keyword
63
63
  attr_accessor :page
64
+ attr_accessor :sort
64
65
  attr_reader :video_count
65
66
  attr_reader :video_from
66
67
  attr_reader :video_to
67
68
 
69
+ Relevance = 'relevance'
70
+ DateAdded = 'video_date_uploaded'
71
+ ViewCount = 'video_view_count'
72
+ Rating = 'video_avg_rating'
73
+
68
74
  @@youtube_search_base_url = "http://www.youtube.com/results?search_query="
69
75
 
70
76
  # Create Youtube::SearchResultScraper object specifying keyword and number of page.
@@ -75,16 +81,19 @@ module Youtube #:nodoc:
75
81
  # * keyword - specify keyword that you want to search on YouTube.
76
82
  # You must specify keyword encoded by UTF-8.
77
83
  # * page - specify number of page
84
+ # * sort - specify sort rule
78
85
 
79
- def initialize keyword, page=nil
86
+ def initialize keyword, page=nil, sort=nil
80
87
  @keyword = keyword
81
88
  @page = page if not page == nil
89
+ @sort = sort if not sort == nil
82
90
  end
83
91
 
84
92
  # Get search result from youtube by specified keyword.
85
93
  def open
86
94
  @url = @@youtube_search_base_url + CGI.escape(@keyword)
87
95
  @url += "&page=#{@page}" if not @page == nil
96
+ @url += "&search_sort=#{@sort}" if not @sort == nil
88
97
  @html = Kernel.open(@url).read
89
98
  replace_document_write_javascript
90
99
  @search_result = Hpricot.parse(@html)
@@ -106,6 +115,7 @@ module Youtube #:nodoc:
106
115
  video.view_count = scrape_view_count(video_html)
107
116
  video.thumbnail_url = scrape_thumbnail_url(video_html)
108
117
  video.tags = scrape_tags(video_html)
118
+ video.upload_time = scrape_upload_time(video_html)
109
119
  video.url = scrape_url(video_html)
110
120
 
111
121
  check_video video
@@ -189,6 +199,22 @@ module Youtube #:nodoc:
189
199
  tags.join(" ")
190
200
  end
191
201
 
202
+ def scrape_upload_time video_html
203
+ if video_html.search("div[@class='vfacets']").inner_html =~ /.*Added:<\/span>\s*(\d+)\s*(hour|day|week|month|year).*/m
204
+ if $2 == "hour"
205
+ Time.now - $1.to_i * 60 * 60
206
+ elsif $2 == "day"
207
+ Time.now - $1.to_i * 60 * 60 * 24
208
+ elsif $2 == "week"
209
+ Time.now - $1.to_i * 60 * 60 * 24 * 7
210
+ elsif $2 == "month"
211
+ Time.now - $1.to_i * 60 * 60 * 24 * 30
212
+ elsif $2 == "year"
213
+ Time.now - $1.to_i * 60 * 60 * 24 * 30 * 12
214
+ end
215
+ end
216
+ end
217
+
192
218
  def scrape_thumbnail_url video_html
193
219
  video_html.search("img[@class='vimg120']").to_html.sub(/.*src="(.*?)".*/, '\1')
194
220
  end
data/lib/youtube/video.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  #--
2
- # Copyright (C) 2006 by in3c.org
3
- # http://in3c.org/
2
+ # Copyright (C) 2006 by in3c.org
3
+ # http://in3c.org/
4
4
  #
5
5
  # Permission is hereby granted, free of charge, to any person obtaining
6
6
  # a copy of this software and associated documentation files (the
@@ -36,13 +36,14 @@ module Youtube
36
36
  attr_accessor :upload_time
37
37
  attr_accessor :comment_count
38
38
  attr_accessor :tags
39
+ attr_accessor :upload_time
39
40
  attr_accessor :url
40
41
  attr_accessor :thumbnail_url
41
42
 
42
43
  # Return self information as XML format.
43
44
  def to_xml
44
45
  xml = "<video>\n"
45
- instance_variables.each do |attr|
46
+ instance_variables.each do |attr|
46
47
  value = instance_variable_get(attr).to_s
47
48
  value.gsub!(/<br \/>/, "\n")
48
49
  value.gsub!(/<.*?>/m, '')
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: youtubescraper
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.8
7
- date: 2007-03-29 00:00:00 +09:00
6
+ version: 0.0.9
7
+ date: 2007-03-30 00:00:00 +09:00
8
8
  summary: This gem provide function to scrape html of search result on youtube
9
9
  require_paths:
10
10
  - lib
@@ -35,7 +35,6 @@ files:
35
35
  - lib/youtube/searchresultscraper.rb
36
36
  - test/youtube_scraper_test.rb
37
37
  - test/html
38
- - test/youtube_scraper_test.rb~
39
38
  - test/html/scraping_error.html
40
39
  - test/html/dataY_noMsgY.htm
41
40
  - test/html/dataN_noMsgN.htm
@@ -1,92 +0,0 @@
1
- #!/usr/bin/ruby
2
-
3
- #require "runit/testcase"
4
- #require "runit/cui/testrunner"
5
- require 'test/unit'
6
-
7
-
8
- require "rubygems"
9
- require 'hpricot'
10
- require "youtube/searchresultscraper"
11
-
12
- class SearchResultScraperTest < Test::Unit::TestCase
13
- #class SearchResultScraperTest < RUNIT::TestCase
14
-
15
- def test_scrape
16
-
17
- #
18
- #�����
19
- #
20
- #������̤�����
21
- open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
22
- #������̤��ʤ�
23
- open_and_scrape("http://www.youtube.com/results?search_query=", "aeudyr jahafudfhadf ahf", 2)
24
-
25
- #
26
- #�۾��
27
- #
28
- #������̤����뤬��Not Found��������
29
- begin
30
- open_local_file_and_scrape("html/dataY_noMsgY.htm")
31
- assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
32
- rescue RuntimeError =>e
33
- #puts e
34
- end
35
- #������̤��ʤ���Not Found��ʤ����
36
- begin
37
- open_local_file_and_scrape("html/dataN_noMsgN.htm")
38
- assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
39
- rescue RuntimeError
40
- #puts e
41
- end
42
-
43
- #�����ι��ܤ��������Ǥ��Ƥ��ʤ����
44
- begin
45
- open_local_file_and_scrape("html/scraping_error.html")
46
- assert_fail("������٤����顼��ȯ�����Ƥ��ʤ�")
47
- rescue RuntimeError => e
48
- # puts e
49
- end
50
- end
51
-
52
- def test_scrape_video_count
53
-
54
- scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemon", 2)
55
- puts scraper.video_count
56
- assert( scraper.video_count > 0 )
57
-
58
- scraper = open_and_scrape("http://www.youtube.com/results?search_query=", "doraemonifdadfa", 2)
59
- puts scraper.video_count
60
- assert( scraper.video_count == 0 )
61
- end
62
-
63
- def open_and_scrape url, keyword=nil, page=nil
64
- scraper = MySearchResultScraper.new(url, keyword, page)
65
- scraper.open
66
- scraper.scrape
67
- scraper
68
- end
69
-
70
- def open_local_file_and_scrape url
71
- scraper = MySearchResultScraper.new(url)
72
- scraper.open_local_file
73
- scraper.scrape
74
- end
75
-
76
- end
77
-
78
- class MySearchResultScraper < Youtube::SearchResultScraper
79
- @@youtube_search_base_url = "http://www.youtube.com/results?search_query="
80
-
81
- def initialize url, keyword=nil, page=nil
82
- @@youtube_search_base_url = url
83
- @keyword = keyword
84
- @page = page if not page == nil
85
- end
86
-
87
- def open_local_file
88
- @html = Kernel.open(@@youtube_search_base_url).read
89
- replace_document_write_javascript
90
- @search_result = Hpricot.parse(@html)
91
- end
92
- end