google-video 0.5.0 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +21 -0
- data/Rakefile +1 -1
- data/lib/google-video.rb +99 -58
- data/test/test_video_details.rb +11 -2
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
* 2006/11/16
|
2
|
+
|
3
|
+
- [shaper] renamed Video#yesterday_view_count to
|
4
|
+
Video#view_count_yesterday for consistency, and aliased to maintain
|
5
|
+
backwards-compatibility.
|
6
|
+
|
7
|
+
- [shaper] Updated to reflect video detail page edits:
|
8
|
+
+ parse new metrics on email and embed count/rank for today and yesterday
|
9
|
+
+ correct parsing to accurately grab from new layout of rank and view
|
10
|
+
count for today and yesterday
|
11
|
+
+ turned Video#rank_delta into a method that returns the delta between
|
12
|
+
yesterday and today's rank.
|
13
|
+
|
14
|
+
- [shaper] Updated hpricot usage for brevity: use Element#[] for referencing
|
15
|
+
attributes and Element#% for slicing out elements where we know there
|
16
|
+
can be only one.
|
17
|
+
|
18
|
+
- [shaper] Fixed typo in rdoc for VideoSearchResponse.
|
19
|
+
|
20
|
+
- [shaper] Optimized string building for URLs and the like.
|
21
|
+
|
1
22
|
* 2006/11/07
|
2
23
|
|
3
24
|
- [shaper] Initial version.
|
data/Rakefile
CHANGED
data/lib/google-video.rb
CHANGED
@@ -64,11 +64,18 @@ module GoogleVideo
|
|
64
64
|
# only available via a details request: the current rank of this video on
|
65
65
|
# the site.
|
66
66
|
attr_reader :rank
|
67
|
-
|
68
|
-
# only available via a details request: the
|
69
|
-
#
|
70
|
-
|
71
|
-
|
67
|
+
|
68
|
+
# only available via a details request: the rank of this video on the site
|
69
|
+
# yesterday.
|
70
|
+
attr_reader :rank_yesterday
|
71
|
+
|
72
|
+
# only available via a details request: the current rank of this video on
|
73
|
+
# the site based on email metrics.
|
74
|
+
attr_reader :rank_email
|
75
|
+
|
76
|
+
# only available via a details request: the current rank of this video on
|
77
|
+
# the site based on embedded html metrics.
|
78
|
+
attr_reader :rank_embed
|
72
79
|
|
73
80
|
# the number of ratings this video has received.
|
74
81
|
attr_reader :rating_count
|
@@ -115,7 +122,18 @@ module GoogleVideo
|
|
115
122
|
|
116
123
|
# only available via a details request: the number of views this video
|
117
124
|
# received yesterday.
|
118
|
-
attr_reader :
|
125
|
+
attr_reader :view_count_yesterday
|
126
|
+
|
127
|
+
# same as Video#view_count_yesterday providing backwards-compatibility.
|
128
|
+
alias_method :yesterday_view_count, :view_count_yesterday
|
129
|
+
|
130
|
+
# only available via a details request: the number of views this video
|
131
|
+
# has received based on email metrics.
|
132
|
+
attr_reader :view_count_email
|
133
|
+
|
134
|
+
# only available via a details request: the number of views this video
|
135
|
+
# has received based on embedded html metrics.
|
136
|
+
attr_reader :view_count_embed
|
119
137
|
|
120
138
|
# the default width in pixels for the video embed html.
|
121
139
|
@@DEFAULT_WIDTH = 400
|
@@ -144,6 +162,13 @@ module GoogleVideo
|
|
144
162
|
src="http://video.google.com/googleplayer.swf?docId=#{@doc_id}&hl=en" flashvars=""> </embed>
|
145
163
|
edoc
|
146
164
|
end
|
165
|
+
|
166
|
+
# only available via a details request: the change in rank this video has
|
167
|
+
# seen since yesterday's rank: if positive, a move up, if negative, a move
|
168
|
+
# down.
|
169
|
+
def rank_delta
|
170
|
+
(@rank_yesterday - @rank)
|
171
|
+
end
|
147
172
|
end
|
148
173
|
|
149
174
|
# Provides a miniature snapshot of a point in time within a full video. On
|
@@ -331,7 +356,7 @@ edoc
|
|
331
356
|
end
|
332
357
|
end
|
333
358
|
|
334
|
-
# Describes a response to a
|
359
|
+
# Describes a response to a VideoSearchRequest.
|
335
360
|
class VideoSearchResponse
|
336
361
|
# the url with which the request was made of the Google Video service.
|
337
362
|
attr_reader :request_url
|
@@ -439,7 +464,7 @@ edoc
|
|
439
464
|
|
440
465
|
# parse the overall search query stats
|
441
466
|
regexp_stats = Regexp.new(/([0-9,]+) \- ([0-9,]+)<\/b> of about <b>([0-9,]+)<\/b> \(<b>([0-9.]+)/)
|
442
|
-
row = (doc
|
467
|
+
row = (doc%"#resultsheadertable/tr/td/font")
|
443
468
|
if !regexp_stats.match(row.inner_html)
|
444
469
|
raise GoogleVideoException.new("failed to parse search query stats")
|
445
470
|
end
|
@@ -450,18 +475,18 @@ edoc
|
|
450
475
|
rows = doc/"table[@class='searchresult']/tr"
|
451
476
|
rows.each do |row|
|
452
477
|
# parse the thumbnail image
|
453
|
-
thumbnail_image_url = _decode_html((row
|
478
|
+
thumbnail_image_url = _decode_html((row%"img[@class='searchresultimg']")['src'])
|
454
479
|
|
455
480
|
# parse the title and page url
|
456
|
-
a_title = (row
|
457
|
-
page_url =
|
481
|
+
a_title = (row%"div[@class='resulttitle']/a")
|
482
|
+
page_url = "http://#{@host}/#{_decode_html(a_title['href'])}"
|
458
483
|
title = _decode_html(a_title.inner_html.strip)
|
459
484
|
|
460
485
|
# parse the description text
|
461
|
-
description = _decode_html((row
|
486
|
+
description = _decode_html((row%"div[@class='snippet']").inner_html.strip)
|
462
487
|
|
463
488
|
# parse the upload username
|
464
|
-
span_channel = (row
|
489
|
+
span_channel = (row%"span[@class='channel']")
|
465
490
|
channel_html = (span_channel) ? span_channel.inner_html : ''
|
466
491
|
channel_html =~ /([^\-]+)/
|
467
492
|
upload_user = _clean_string($1)
|
@@ -470,11 +495,11 @@ edoc
|
|
470
495
|
star_count = _parse_star_elements(row/"img[@class='star']")
|
471
496
|
|
472
497
|
# rating count
|
473
|
-
span_raters = (row
|
498
|
+
span_raters = (row%"span[@id='numOfRaters']")
|
474
499
|
rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
|
475
500
|
|
476
501
|
# duration
|
477
|
-
span_date = (row
|
502
|
+
span_date = (row%"span[@class='date']")
|
478
503
|
date_html = span_date.inner_html
|
479
504
|
date_html =~ /([^\-]+) \- (.*)$/
|
480
505
|
duration = _clean_string($1)
|
@@ -519,20 +544,20 @@ edoc
|
|
519
544
|
title = (doc/"div[@id='pvprogtitle']").inner_html.strip
|
520
545
|
|
521
546
|
# parse description
|
522
|
-
font_description = (doc
|
547
|
+
font_description = (doc%"div[@id='description']/font")
|
523
548
|
description = (font_description) ? font_description.all_text.strip : ''
|
524
|
-
span_wholedescr = (doc
|
549
|
+
span_wholedescr = (doc%"span[@id='wholedescr']")
|
525
550
|
if (span_wholedescr)
|
526
551
|
description += ' ' + span_wholedescr.all_text.strip
|
527
552
|
end
|
528
553
|
description = _decode_html(description)
|
529
554
|
|
530
555
|
# parse star count
|
531
|
-
span_rating = (doc
|
556
|
+
span_rating = (doc%"span[@id='communityRating']")
|
532
557
|
star_count = _parse_star_elements(span_rating/"img[@class='star']")
|
533
558
|
|
534
559
|
# parse rating count
|
535
|
-
span_raters = (doc
|
560
|
+
span_raters = (doc%"span[@id='numOfRaters']")
|
536
561
|
rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
|
537
562
|
|
538
563
|
# parse upload user, duration, upload date, upload user domain, upload
|
@@ -542,7 +567,7 @@ edoc
|
|
542
567
|
# rows of text, with only the middle row (in the three-row scenario)
|
543
568
|
# containing duration and upload date, omnipresent. still, we buckle
|
544
569
|
# down and have at it with fervor and tenacity.
|
545
|
-
duration_etc_html = (doc
|
570
|
+
duration_etc_html = (doc%"div[@id='durationetc']").inner_html
|
546
571
|
duration_parts = duration_etc_html.split(/<br[^>]+>/)
|
547
572
|
# see if the first line looks like it has a date formatted ala 'Nov 9, 2006'
|
548
573
|
if (duration_parts[0] =~ /\- [A-Za-z]{3} \d+, \d{4}/)
|
@@ -563,7 +588,7 @@ edoc
|
|
563
588
|
|
564
589
|
# parse the upload user url and domain if present
|
565
590
|
if (upload_user_domain =~ /<a.*?href="([^"]+)"[^>]+>([^<]+)<\/a>/)
|
566
|
-
upload_user_url =
|
591
|
+
upload_user_url = "http://#{@host}#{_decode_html(_clean_string($1))}"
|
567
592
|
upload_user_domain = _clean_string($2)
|
568
593
|
else
|
569
594
|
upload_user_url = ''
|
@@ -571,28 +596,21 @@ edoc
|
|
571
596
|
end
|
572
597
|
|
573
598
|
# pull out view count and rank info table row elements
|
574
|
-
|
575
|
-
|
576
|
-
# parse view count
|
577
|
-
tr_total_views.inner_html =~ /<b>([^<]+)<\/b>/
|
578
|
-
view_count = _human_number_to_int($1)
|
579
|
-
|
580
|
-
# parse yesterday's view count
|
581
|
-
tr_views_yesterday.inner_html =~ /\s+([0-9,]+) yesterday/
|
582
|
-
yesterday_view_count = _human_number_to_int($1)
|
599
|
+
tr_statsall = (doc/"div[@id='statsall']/table/tr")
|
583
600
|
|
584
|
-
#
|
585
|
-
|
586
|
-
rank = _human_number_to_int($1)
|
601
|
+
# remove the first row which just contains header info
|
602
|
+
tr_statsall.shift
|
587
603
|
|
588
|
-
# parse rank
|
589
|
-
(
|
590
|
-
|
604
|
+
# parse each of the view count and rank rows
|
605
|
+
(view_count, rank) = _parse_statsall_row(tr_statsall.shift)
|
606
|
+
(view_count_yesterday, rank_yesterday) = _parse_statsall_row(tr_statsall.shift)
|
607
|
+
(view_count_email, rank_email) = _parse_statsall_row(tr_statsall.shift)
|
608
|
+
(view_count_embed, rank_embed) = _parse_statsall_row(tr_statsall.shift)
|
591
609
|
|
592
610
|
# pull out the url to the video .gvp file if prsent
|
593
|
-
img_download = (doc
|
611
|
+
img_download = (doc%"img[@src='/static/btn_download.gif']")
|
594
612
|
if (img_download)
|
595
|
-
onclick_html = img_download
|
613
|
+
onclick_html = img_download['onclick']
|
596
614
|
onclick_script = _decode_html(onclick_html)
|
597
615
|
onclick_script =~ /onDownloadClick\(([^\)]+)\)/
|
598
616
|
video_file_url = onclick_script.split(",")[1].gsub(/"/, '')
|
@@ -608,7 +626,7 @@ edoc
|
|
608
626
|
|
609
627
|
# pull out the playlist entries
|
610
628
|
playlist_entries = []
|
611
|
-
table_upnext = (doc
|
629
|
+
table_upnext = (doc%"table[@id='upnexttable']")
|
612
630
|
(table_upnext/"tr").each do |tr_playlist|
|
613
631
|
playlist_entries << _parse_playlist_entry(tr_playlist)
|
614
632
|
end
|
@@ -619,7 +637,9 @@ edoc
|
|
619
637
|
:page_url => url,
|
620
638
|
:playlist_entries => playlist_entries,
|
621
639
|
:rank => rank,
|
622
|
-
:
|
640
|
+
:rank_yesterday => rank_yesterday,
|
641
|
+
:rank_email => rank_email,
|
642
|
+
:rank_embed => rank_embed,
|
623
643
|
:rating_count => rating_count,
|
624
644
|
:star_count => star_count,
|
625
645
|
:title => title,
|
@@ -630,7 +650,9 @@ edoc
|
|
630
650
|
:video_file_url => video_file_url,
|
631
651
|
:video_frame_thumbnails => video_frame_thumbnails,
|
632
652
|
:view_count => view_count,
|
633
|
-
:
|
653
|
+
:view_count_yesterday => view_count_yesterday,
|
654
|
+
:view_count_email => view_count_email,
|
655
|
+
:view_count_embed => view_count_embed)
|
634
656
|
|
635
657
|
# build and return the response
|
636
658
|
VideoDetailsResponse.new(:request_url => url, :video => video)
|
@@ -675,11 +697,11 @@ edoc
|
|
675
697
|
rank_yesterday = td_rank_yesterday.inner_html.to_i
|
676
698
|
|
677
699
|
# parse the video thumbnail image
|
678
|
-
thumbnail_image_url = _decode_html((td_thumbnail
|
700
|
+
thumbnail_image_url = _decode_html((td_thumbnail%"a/img")['src'])
|
679
701
|
|
680
702
|
# parse the detailed video info
|
681
|
-
a_video = (td_detail
|
682
|
-
page_url =
|
703
|
+
a_video = (td_detail%"a")
|
704
|
+
page_url = "http://#{@host}#{a_video['href']}"
|
683
705
|
|
684
706
|
# title
|
685
707
|
title = _decode_html(a_video.inner_html.strip)
|
@@ -688,11 +710,11 @@ edoc
|
|
688
710
|
star_count = _parse_star_elements(td_detail/"div[@class='meta']/span/font/img[@class='star']")
|
689
711
|
|
690
712
|
# rating count
|
691
|
-
span_raters = (td_detail
|
713
|
+
span_raters = (td_detail%"div[@class='meta']/span/font/span[@id='numOfRaters']")
|
692
714
|
rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
|
693
715
|
|
694
716
|
# duration
|
695
|
-
duration = (td_detail
|
717
|
+
duration = (td_detail%"div[@class='meta']").all_text.gsub(/ /, '').strip
|
696
718
|
|
697
719
|
# description
|
698
720
|
description = _decode_html((td_detail).all_text.strip)
|
@@ -718,12 +740,31 @@ edoc
|
|
718
740
|
end
|
719
741
|
|
720
742
|
private
|
743
|
+
|
744
|
+
# Breakout method used by Client#video_details to make things more
|
745
|
+
# manageable, returning a tuple as (view count, rank) for the supplied
|
746
|
+
# data row within the "statsall" div element's statistics table.
|
747
|
+
def _parse_statsall_row (row)
|
748
|
+
td_alltime = row/"td"
|
749
|
+
view_count = _human_number_to_int(_parse_number(td_alltime[1].all_text))
|
750
|
+
rank = _human_number_to_int(_parse_number(td_alltime[2].all_text))
|
751
|
+
[ view_count, rank ]
|
752
|
+
end
|
753
|
+
|
754
|
+
# Given html containing some set of garbage and a human-readable number
|
755
|
+
# somewhere therein, e.g. " 554,200", returns an integer
|
756
|
+
# representing just the number.
|
757
|
+
def _parse_number (html)
|
758
|
+
html =~ /([0-9,\-]+)/
|
759
|
+
$1
|
760
|
+
end
|
761
|
+
|
721
762
|
# Breakout method used by Client#video_details to make things more
|
722
763
|
# manageable, returning a VideoFrameThumbnail constructed from the given
|
723
764
|
# image element.
|
724
765
|
def _parse_video_frame_thumbnail (frame_image)
|
725
|
-
title = frame_image
|
726
|
-
thumbnail_image_url = _decode_html(frame_image
|
766
|
+
title = frame_image['title']
|
767
|
+
thumbnail_image_url = _decode_html(frame_image['src'])
|
727
768
|
VideoFrameThumbnail.new(:title => title, :thumbnail_image_url => thumbnail_image_url)
|
728
769
|
end
|
729
770
|
|
@@ -735,15 +776,15 @@ edoc
|
|
735
776
|
( td_thumbnail, td_playlist ) = (tr_playlist/"td")
|
736
777
|
|
737
778
|
# parse thumbnail image
|
738
|
-
thumbnail_image_url = _decode_html((td_thumbnail
|
779
|
+
thumbnail_image_url = _decode_html((td_thumbnail%"img")['src'])
|
739
780
|
|
740
781
|
# parse the page url and title
|
741
|
-
a_page = (td_playlist
|
742
|
-
page_url =
|
743
|
-
title = _decode_html(a_page
|
782
|
+
a_page = (td_playlist%"a")
|
783
|
+
page_url = "http://#{@host}#{a_page['href']}"
|
784
|
+
title = _decode_html(a_page['title'])
|
744
785
|
|
745
786
|
# parse the upload user and duration
|
746
|
-
meta_html = (td_playlist
|
787
|
+
meta_html = (td_playlist%"span[@class='meta']").inner_html
|
747
788
|
if (meta_html =~ /([^<]+)<br \/>(.*)/)
|
748
789
|
upload_user = _clean_string($1)
|
749
790
|
duration = _clean_string($2)
|
@@ -774,7 +815,7 @@ edoc
|
|
774
815
|
def _parse_star_elements (elements)
|
775
816
|
star_count = 0
|
776
817
|
elements.each do |star|
|
777
|
-
star_src = star
|
818
|
+
star_src = star['src']
|
778
819
|
if (star_src =~ /starLittle\.gif$/)
|
779
820
|
star_count += 1
|
780
821
|
elsif (star_src =~ /starLittleHalf\.gif$/)
|
@@ -793,7 +834,7 @@ edoc
|
|
793
834
|
url = "http://#{@host}/videoranking"
|
794
835
|
if top_request.country
|
795
836
|
country_code = TopVideosRequest::COUNTRIES[top_request.country]
|
796
|
-
url
|
837
|
+
url.concat "?cr=#{country_code}"
|
797
838
|
end
|
798
839
|
url
|
799
840
|
end
|
@@ -801,17 +842,17 @@ edoc
|
|
801
842
|
def _search_url (search_request)
|
802
843
|
query = search_request.query
|
803
844
|
if (search_request.duration && search_request.duration != 'all')
|
804
|
-
query
|
845
|
+
query.concat "+duration:#{search_request.duration}"
|
805
846
|
end
|
806
847
|
|
807
848
|
url = "http://#{@host}/videosearch?q=#{URI.encode(query)}"
|
808
849
|
|
809
850
|
if (search_request.page)
|
810
|
-
url
|
851
|
+
url.concat "&page=#{URI.encode(search_request.page.to_s)}"
|
811
852
|
end
|
812
853
|
|
813
854
|
if (search_request.sort)
|
814
|
-
url
|
855
|
+
url.concat "&so=#{URI.encode(search_request.sort)}"
|
815
856
|
end
|
816
857
|
|
817
858
|
url
|
data/test/test_video_details.rb
CHANGED
@@ -87,10 +87,19 @@ class TestVideoDetails < Test::Unit::TestCase
|
|
87
87
|
|
88
88
|
# poke and prod the video object
|
89
89
|
assert_valid_video video
|
90
|
-
assert (video.view_count >= video.
|
90
|
+
assert (video.view_count >= video.view_count_yesterday)
|
91
|
+
assert (video.view_count >= video.view_count_embed)
|
92
|
+
assert (video.view_count >= video.view_count_email)
|
91
93
|
assert_match /http:\/\/video\.google\.com\/videogvp\/[^\.]+\.gvp\?docid=[0-9\-\+]+/, video.video_file_url
|
92
94
|
assert_kind_of Numeric, video.rank
|
93
|
-
assert_kind_of Numeric, video.
|
95
|
+
assert_kind_of Numeric, video.rank_yesterday
|
96
|
+
assert_kind_of Numeric, video.rank_email
|
97
|
+
assert_kind_of Numeric, video.rank_embed
|
98
|
+
assert_kind_of Numeric, video.view_count
|
99
|
+
assert_kind_of Numeric, video.view_count_yesterday
|
100
|
+
assert_kind_of Numeric, video.view_count_email
|
101
|
+
assert_kind_of Numeric, video.view_count_embed
|
102
|
+
assert_equal video.view_count_yesterday, video.yesterday_view_count
|
94
103
|
assert_instance_of Time, video.upload_date
|
95
104
|
|
96
105
|
# check the video frame thumbnails
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: google-video
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2006-11-
|
6
|
+
version: 0.6.0
|
7
|
+
date: 2006-11-16 00:00:00 -08:00
|
8
8
|
summary: A Ruby object-oriented interface to Google Video content.
|
9
9
|
require_paths:
|
10
10
|
- lib
|