google-video 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +21 -0
- data/Rakefile +1 -1
- data/lib/google-video.rb +99 -58
- data/test/test_video_details.rb +11 -2
- metadata +2 -2
data/CHANGELOG
CHANGED
@@ -1,3 +1,24 @@
|
|
1
|
+
* 2006/11/16
|
2
|
+
|
3
|
+
- [shaper] renamed Video#yesterday_view_count to
|
4
|
+
Video#view_count_yesterday for consistency, and aliased to maintain
|
5
|
+
backwards-compatibility.
|
6
|
+
|
7
|
+
- [shaper] Updated to reflect video detail page edits:
|
8
|
+
+ parse new metrics on email and embed count/rank for today and yesterday
|
9
|
+
+ correct parsing to accurately grab from new layout of rank and view
|
10
|
+
count for today and yesterday
|
11
|
+
+ turned Video#rank_delta into a method that returns the delta between
|
12
|
+
yesterday and today's rank.
|
13
|
+
|
14
|
+
- [shaper] Updated hpricot usage for brevity: use Element#[] for referencing
|
15
|
+
attributes and Element#% for slicing out elements where we know there
|
16
|
+
can be only one.
|
17
|
+
|
18
|
+
- [shaper] Fixed typo in rdoc for VideoSearchResponse.
|
19
|
+
|
20
|
+
- [shaper] Optimized string building for URLs and the like.
|
21
|
+
|
1
22
|
* 2006/11/07
|
2
23
|
|
3
24
|
- [shaper] Initial version.
|
data/Rakefile
CHANGED
data/lib/google-video.rb
CHANGED
@@ -64,11 +64,18 @@ module GoogleVideo
|
|
64
64
|
# only available via a details request: the current rank of this video on
|
65
65
|
# the site.
|
66
66
|
attr_reader :rank
|
67
|
-
|
68
|
-
# only available via a details request: the
|
69
|
-
#
|
70
|
-
|
71
|
-
|
67
|
+
|
68
|
+
# only available via a details request: the rank of this video on the site
|
69
|
+
# yesterday.
|
70
|
+
attr_reader :rank_yesterday
|
71
|
+
|
72
|
+
# only available via a details request: the current rank of this video on
|
73
|
+
# the site based on email metrics.
|
74
|
+
attr_reader :rank_email
|
75
|
+
|
76
|
+
# only available via a details request: the current rank of this video on
|
77
|
+
# the site based on embedded html metrics.
|
78
|
+
attr_reader :rank_embed
|
72
79
|
|
73
80
|
# the number of ratings this video has received.
|
74
81
|
attr_reader :rating_count
|
@@ -115,7 +122,18 @@ module GoogleVideo
|
|
115
122
|
|
116
123
|
# only available via a details request: the number of views this video
|
117
124
|
# received yesterday.
|
118
|
-
attr_reader :
|
125
|
+
attr_reader :view_count_yesterday
|
126
|
+
|
127
|
+
# same as Video#view_count_yesterday providing backwards-compatibility.
|
128
|
+
alias_method :yesterday_view_count, :view_count_yesterday
|
129
|
+
|
130
|
+
# only available via a details request: the number of views this video
|
131
|
+
# has received based on email metrics.
|
132
|
+
attr_reader :view_count_email
|
133
|
+
|
134
|
+
# only available via a details request: the number of views this video
|
135
|
+
# has received based on embedded html metrics.
|
136
|
+
attr_reader :view_count_embed
|
119
137
|
|
120
138
|
# the default width in pixels for the video embed html.
|
121
139
|
@@DEFAULT_WIDTH = 400
|
@@ -144,6 +162,13 @@ module GoogleVideo
|
|
144
162
|
src="http://video.google.com/googleplayer.swf?docId=#{@doc_id}&hl=en" flashvars=""> </embed>
|
145
163
|
edoc
|
146
164
|
end
|
165
|
+
|
166
|
+
# only available via a details request: the change in rank this video has
|
167
|
+
# seen since yesterday's rank: if positive, a move up, if negative, a move
|
168
|
+
# down.
|
169
|
+
def rank_delta
|
170
|
+
(@rank_yesterday - @rank)
|
171
|
+
end
|
147
172
|
end
|
148
173
|
|
149
174
|
# Provides a miniature snapshot of a point in time within a full video. On
|
@@ -331,7 +356,7 @@ edoc
|
|
331
356
|
end
|
332
357
|
end
|
333
358
|
|
334
|
-
# Describes a response to a
|
359
|
+
# Describes a response to a VideoSearchRequest.
|
335
360
|
class VideoSearchResponse
|
336
361
|
# the url with which the request was made of the Google Video service.
|
337
362
|
attr_reader :request_url
|
@@ -439,7 +464,7 @@ edoc
|
|
439
464
|
|
440
465
|
# parse the overall search query stats
|
441
466
|
regexp_stats = Regexp.new(/([0-9,]+) \- ([0-9,]+)<\/b> of about <b>([0-9,]+)<\/b> \(<b>([0-9.]+)/)
|
442
|
-
row = (doc
|
467
|
+
row = (doc%"#resultsheadertable/tr/td/font")
|
443
468
|
if !regexp_stats.match(row.inner_html)
|
444
469
|
raise GoogleVideoException.new("failed to parse search query stats")
|
445
470
|
end
|
@@ -450,18 +475,18 @@ edoc
|
|
450
475
|
rows = doc/"table[@class='searchresult']/tr"
|
451
476
|
rows.each do |row|
|
452
477
|
# parse the thumbnail image
|
453
|
-
thumbnail_image_url = _decode_html((row
|
478
|
+
thumbnail_image_url = _decode_html((row%"img[@class='searchresultimg']")['src'])
|
454
479
|
|
455
480
|
# parse the title and page url
|
456
|
-
a_title = (row
|
457
|
-
page_url =
|
481
|
+
a_title = (row%"div[@class='resulttitle']/a")
|
482
|
+
page_url = "http://#{@host}/#{_decode_html(a_title['href'])}"
|
458
483
|
title = _decode_html(a_title.inner_html.strip)
|
459
484
|
|
460
485
|
# parse the description text
|
461
|
-
description = _decode_html((row
|
486
|
+
description = _decode_html((row%"div[@class='snippet']").inner_html.strip)
|
462
487
|
|
463
488
|
# parse the upload username
|
464
|
-
span_channel = (row
|
489
|
+
span_channel = (row%"span[@class='channel']")
|
465
490
|
channel_html = (span_channel) ? span_channel.inner_html : ''
|
466
491
|
channel_html =~ /([^\-]+)/
|
467
492
|
upload_user = _clean_string($1)
|
@@ -470,11 +495,11 @@ edoc
|
|
470
495
|
star_count = _parse_star_elements(row/"img[@class='star']")
|
471
496
|
|
472
497
|
# rating count
|
473
|
-
span_raters = (row
|
498
|
+
span_raters = (row%"span[@id='numOfRaters']")
|
474
499
|
rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
|
475
500
|
|
476
501
|
# duration
|
477
|
-
span_date = (row
|
502
|
+
span_date = (row%"span[@class='date']")
|
478
503
|
date_html = span_date.inner_html
|
479
504
|
date_html =~ /([^\-]+) \- (.*)$/
|
480
505
|
duration = _clean_string($1)
|
@@ -519,20 +544,20 @@ edoc
|
|
519
544
|
title = (doc/"div[@id='pvprogtitle']").inner_html.strip
|
520
545
|
|
521
546
|
# parse description
|
522
|
-
font_description = (doc
|
547
|
+
font_description = (doc%"div[@id='description']/font")
|
523
548
|
description = (font_description) ? font_description.all_text.strip : ''
|
524
|
-
span_wholedescr = (doc
|
549
|
+
span_wholedescr = (doc%"span[@id='wholedescr']")
|
525
550
|
if (span_wholedescr)
|
526
551
|
description += ' ' + span_wholedescr.all_text.strip
|
527
552
|
end
|
528
553
|
description = _decode_html(description)
|
529
554
|
|
530
555
|
# parse star count
|
531
|
-
span_rating = (doc
|
556
|
+
span_rating = (doc%"span[@id='communityRating']")
|
532
557
|
star_count = _parse_star_elements(span_rating/"img[@class='star']")
|
533
558
|
|
534
559
|
# parse rating count
|
535
|
-
span_raters = (doc
|
560
|
+
span_raters = (doc%"span[@id='numOfRaters']")
|
536
561
|
rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
|
537
562
|
|
538
563
|
# parse upload user, duration, upload date, upload user domain, upload
|
@@ -542,7 +567,7 @@ edoc
|
|
542
567
|
# rows of text, with only the middle row (in the three-row scenario)
|
543
568
|
# containing duration and upload date, omnipresent. still, we buckle
|
544
569
|
# down and have at it with fervor and tenacity.
|
545
|
-
duration_etc_html = (doc
|
570
|
+
duration_etc_html = (doc%"div[@id='durationetc']").inner_html
|
546
571
|
duration_parts = duration_etc_html.split(/<br[^>]+>/)
|
547
572
|
# see if the first line looks like it has a date formatted ala 'Nov 9, 2006'
|
548
573
|
if (duration_parts[0] =~ /\- [A-Za-z]{3} \d+, \d{4}/)
|
@@ -563,7 +588,7 @@ edoc
|
|
563
588
|
|
564
589
|
# parse the upload user url and domain if present
|
565
590
|
if (upload_user_domain =~ /<a.*?href="([^"]+)"[^>]+>([^<]+)<\/a>/)
|
566
|
-
upload_user_url =
|
591
|
+
upload_user_url = "http://#{@host}#{_decode_html(_clean_string($1))}"
|
567
592
|
upload_user_domain = _clean_string($2)
|
568
593
|
else
|
569
594
|
upload_user_url = ''
|
@@ -571,28 +596,21 @@ edoc
|
|
571
596
|
end
|
572
597
|
|
573
598
|
# pull out view count and rank info table row elements
|
574
|
-
|
575
|
-
|
576
|
-
# parse view count
|
577
|
-
tr_total_views.inner_html =~ /<b>([^<]+)<\/b>/
|
578
|
-
view_count = _human_number_to_int($1)
|
579
|
-
|
580
|
-
# parse yesterday's view count
|
581
|
-
tr_views_yesterday.inner_html =~ /\s+([0-9,]+) yesterday/
|
582
|
-
yesterday_view_count = _human_number_to_int($1)
|
599
|
+
tr_statsall = (doc/"div[@id='statsall']/table/tr")
|
583
600
|
|
584
|
-
#
|
585
|
-
|
586
|
-
rank = _human_number_to_int($1)
|
601
|
+
# remove the first row which just contains header info
|
602
|
+
tr_statsall.shift
|
587
603
|
|
588
|
-
# parse rank
|
589
|
-
(
|
590
|
-
|
604
|
+
# parse each of the view count and rank rows
|
605
|
+
(view_count, rank) = _parse_statsall_row(tr_statsall.shift)
|
606
|
+
(view_count_yesterday, rank_yesterday) = _parse_statsall_row(tr_statsall.shift)
|
607
|
+
(view_count_email, rank_email) = _parse_statsall_row(tr_statsall.shift)
|
608
|
+
(view_count_embed, rank_embed) = _parse_statsall_row(tr_statsall.shift)
|
591
609
|
|
592
610
|
# pull out the url to the video .gvp file if prsent
|
593
|
-
img_download = (doc
|
611
|
+
img_download = (doc%"img[@src='/static/btn_download.gif']")
|
594
612
|
if (img_download)
|
595
|
-
onclick_html = img_download
|
613
|
+
onclick_html = img_download['onclick']
|
596
614
|
onclick_script = _decode_html(onclick_html)
|
597
615
|
onclick_script =~ /onDownloadClick\(([^\)]+)\)/
|
598
616
|
video_file_url = onclick_script.split(",")[1].gsub(/"/, '')
|
@@ -608,7 +626,7 @@ edoc
|
|
608
626
|
|
609
627
|
# pull out the playlist entries
|
610
628
|
playlist_entries = []
|
611
|
-
table_upnext = (doc
|
629
|
+
table_upnext = (doc%"table[@id='upnexttable']")
|
612
630
|
(table_upnext/"tr").each do |tr_playlist|
|
613
631
|
playlist_entries << _parse_playlist_entry(tr_playlist)
|
614
632
|
end
|
@@ -619,7 +637,9 @@ edoc
|
|
619
637
|
:page_url => url,
|
620
638
|
:playlist_entries => playlist_entries,
|
621
639
|
:rank => rank,
|
622
|
-
:
|
640
|
+
:rank_yesterday => rank_yesterday,
|
641
|
+
:rank_email => rank_email,
|
642
|
+
:rank_embed => rank_embed,
|
623
643
|
:rating_count => rating_count,
|
624
644
|
:star_count => star_count,
|
625
645
|
:title => title,
|
@@ -630,7 +650,9 @@ edoc
|
|
630
650
|
:video_file_url => video_file_url,
|
631
651
|
:video_frame_thumbnails => video_frame_thumbnails,
|
632
652
|
:view_count => view_count,
|
633
|
-
:
|
653
|
+
:view_count_yesterday => view_count_yesterday,
|
654
|
+
:view_count_email => view_count_email,
|
655
|
+
:view_count_embed => view_count_embed)
|
634
656
|
|
635
657
|
# build and return the response
|
636
658
|
VideoDetailsResponse.new(:request_url => url, :video => video)
|
@@ -675,11 +697,11 @@ edoc
|
|
675
697
|
rank_yesterday = td_rank_yesterday.inner_html.to_i
|
676
698
|
|
677
699
|
# parse the video thumbnail image
|
678
|
-
thumbnail_image_url = _decode_html((td_thumbnail
|
700
|
+
thumbnail_image_url = _decode_html((td_thumbnail%"a/img")['src'])
|
679
701
|
|
680
702
|
# parse the detailed video info
|
681
|
-
a_video = (td_detail
|
682
|
-
page_url =
|
703
|
+
a_video = (td_detail%"a")
|
704
|
+
page_url = "http://#{@host}#{a_video['href']}"
|
683
705
|
|
684
706
|
# title
|
685
707
|
title = _decode_html(a_video.inner_html.strip)
|
@@ -688,11 +710,11 @@ edoc
|
|
688
710
|
star_count = _parse_star_elements(td_detail/"div[@class='meta']/span/font/img[@class='star']")
|
689
711
|
|
690
712
|
# rating count
|
691
|
-
span_raters = (td_detail
|
713
|
+
span_raters = (td_detail%"div[@class='meta']/span/font/span[@id='numOfRaters']")
|
692
714
|
rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
|
693
715
|
|
694
716
|
# duration
|
695
|
-
duration = (td_detail
|
717
|
+
duration = (td_detail%"div[@class='meta']").all_text.gsub(/ /, '').strip
|
696
718
|
|
697
719
|
# description
|
698
720
|
description = _decode_html((td_detail).all_text.strip)
|
@@ -718,12 +740,31 @@ edoc
|
|
718
740
|
end
|
719
741
|
|
720
742
|
private
|
743
|
+
|
744
|
+
# Breakout method used by Client#video_details to make things more
|
745
|
+
# manageable, returning a tuple as (view count, rank) for the supplied
|
746
|
+
# data row within the "statsall" div element's statistics table.
|
747
|
+
def _parse_statsall_row (row)
|
748
|
+
td_alltime = row/"td"
|
749
|
+
view_count = _human_number_to_int(_parse_number(td_alltime[1].all_text))
|
750
|
+
rank = _human_number_to_int(_parse_number(td_alltime[2].all_text))
|
751
|
+
[ view_count, rank ]
|
752
|
+
end
|
753
|
+
|
754
|
+
# Given html containing some set of garbage and a human-readable number
|
755
|
+
# somewhere therein, e.g. " 554,200", returns an integer
|
756
|
+
# representing just the number.
|
757
|
+
def _parse_number (html)
|
758
|
+
html =~ /([0-9,\-]+)/
|
759
|
+
$1
|
760
|
+
end
|
761
|
+
|
721
762
|
# Breakout method used by Client#video_details to make things more
|
722
763
|
# manageable, returning a VideoFrameThumbnail constructed from the given
|
723
764
|
# image element.
|
724
765
|
def _parse_video_frame_thumbnail (frame_image)
|
725
|
-
title = frame_image
|
726
|
-
thumbnail_image_url = _decode_html(frame_image
|
766
|
+
title = frame_image['title']
|
767
|
+
thumbnail_image_url = _decode_html(frame_image['src'])
|
727
768
|
VideoFrameThumbnail.new(:title => title, :thumbnail_image_url => thumbnail_image_url)
|
728
769
|
end
|
729
770
|
|
@@ -735,15 +776,15 @@ edoc
|
|
735
776
|
( td_thumbnail, td_playlist ) = (tr_playlist/"td")
|
736
777
|
|
737
778
|
# parse thumbnail image
|
738
|
-
thumbnail_image_url = _decode_html((td_thumbnail
|
779
|
+
thumbnail_image_url = _decode_html((td_thumbnail%"img")['src'])
|
739
780
|
|
740
781
|
# parse the page url and title
|
741
|
-
a_page = (td_playlist
|
742
|
-
page_url =
|
743
|
-
title = _decode_html(a_page
|
782
|
+
a_page = (td_playlist%"a")
|
783
|
+
page_url = "http://#{@host}#{a_page['href']}"
|
784
|
+
title = _decode_html(a_page['title'])
|
744
785
|
|
745
786
|
# parse the upload user and duration
|
746
|
-
meta_html = (td_playlist
|
787
|
+
meta_html = (td_playlist%"span[@class='meta']").inner_html
|
747
788
|
if (meta_html =~ /([^<]+)<br \/>(.*)/)
|
748
789
|
upload_user = _clean_string($1)
|
749
790
|
duration = _clean_string($2)
|
@@ -774,7 +815,7 @@ edoc
|
|
774
815
|
def _parse_star_elements (elements)
|
775
816
|
star_count = 0
|
776
817
|
elements.each do |star|
|
777
|
-
star_src = star
|
818
|
+
star_src = star['src']
|
778
819
|
if (star_src =~ /starLittle\.gif$/)
|
779
820
|
star_count += 1
|
780
821
|
elsif (star_src =~ /starLittleHalf\.gif$/)
|
@@ -793,7 +834,7 @@ edoc
|
|
793
834
|
url = "http://#{@host}/videoranking"
|
794
835
|
if top_request.country
|
795
836
|
country_code = TopVideosRequest::COUNTRIES[top_request.country]
|
796
|
-
url
|
837
|
+
url.concat "?cr=#{country_code}"
|
797
838
|
end
|
798
839
|
url
|
799
840
|
end
|
@@ -801,17 +842,17 @@ edoc
|
|
801
842
|
def _search_url (search_request)
|
802
843
|
query = search_request.query
|
803
844
|
if (search_request.duration && search_request.duration != 'all')
|
804
|
-
query
|
845
|
+
query.concat "+duration:#{search_request.duration}"
|
805
846
|
end
|
806
847
|
|
807
848
|
url = "http://#{@host}/videosearch?q=#{URI.encode(query)}"
|
808
849
|
|
809
850
|
if (search_request.page)
|
810
|
-
url
|
851
|
+
url.concat "&page=#{URI.encode(search_request.page.to_s)}"
|
811
852
|
end
|
812
853
|
|
813
854
|
if (search_request.sort)
|
814
|
-
url
|
855
|
+
url.concat "&so=#{URI.encode(search_request.sort)}"
|
815
856
|
end
|
816
857
|
|
817
858
|
url
|
data/test/test_video_details.rb
CHANGED
@@ -87,10 +87,19 @@ class TestVideoDetails < Test::Unit::TestCase
|
|
87
87
|
|
88
88
|
# poke and prod the video object
|
89
89
|
assert_valid_video video
|
90
|
-
assert (video.view_count >= video.
|
90
|
+
assert (video.view_count >= video.view_count_yesterday)
|
91
|
+
assert (video.view_count >= video.view_count_embed)
|
92
|
+
assert (video.view_count >= video.view_count_email)
|
91
93
|
assert_match /http:\/\/video\.google\.com\/videogvp\/[^\.]+\.gvp\?docid=[0-9\-\+]+/, video.video_file_url
|
92
94
|
assert_kind_of Numeric, video.rank
|
93
|
-
assert_kind_of Numeric, video.
|
95
|
+
assert_kind_of Numeric, video.rank_yesterday
|
96
|
+
assert_kind_of Numeric, video.rank_email
|
97
|
+
assert_kind_of Numeric, video.rank_embed
|
98
|
+
assert_kind_of Numeric, video.view_count
|
99
|
+
assert_kind_of Numeric, video.view_count_yesterday
|
100
|
+
assert_kind_of Numeric, video.view_count_email
|
101
|
+
assert_kind_of Numeric, video.view_count_embed
|
102
|
+
assert_equal video.view_count_yesterday, video.yesterday_view_count
|
94
103
|
assert_instance_of Time, video.upload_date
|
95
104
|
|
96
105
|
# check the video frame thumbnails
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
|
|
3
3
|
specification_version: 1
|
4
4
|
name: google-video
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.
|
7
|
-
date: 2006-11-
|
6
|
+
version: 0.6.0
|
7
|
+
date: 2006-11-16 00:00:00 -08:00
|
8
8
|
summary: A Ruby object-oriented interface to Google Video content.
|
9
9
|
require_paths:
|
10
10
|
- lib
|