google-video 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,24 @@
1
+ * 2006/11/16
2
+
3
+ - [shaper] renamed Video#yesterday_view_count to
4
+ Video#view_count_yesterday for consistency, and aliased to maintain
5
+ backwards-compatibility.
6
+
7
+ - [shaper] Updated to reflect video detail page edits:
8
+ + parse new metrics on email and embed count/rank for today and yesterday
9
+ + correct parsing to accurately grab from new layout of rank and view
10
+ count for today and yesterday
11
+ + turned Video#rank_delta into a method that returns the delta between
12
+ yesterday and today's rank.
13
+
14
+ - [shaper] Updated hpricot usage for brevity: use Element#[] for referencing
15
+ attributes and Element#% for slicing out elements where we know there
16
+ can be only one.
17
+
18
+ - [shaper] Fixed typo in rdoc for VideoSearchResponse.
19
+
20
+ - [shaper] Optimized string building for URLs and the like.
21
+
1
22
  * 2006/11/07
2
23
 
3
24
  - [shaper] Initial version.
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ require 'rake/gempackagetask'
6
6
 
7
7
  spec = Gem::Specification.new do |s|
8
8
  s.name = 'google-video'
9
- s.version = '0.5.0'
9
+ s.version = '0.6.0'
10
10
  s.author = 'Walter Korman'
11
11
  s.email = 'shaper@wgks.org'
12
12
  s.platform = Gem::Platform::RUBY
@@ -64,11 +64,18 @@ module GoogleVideo
64
64
  # only available via a details request: the current rank of this video on
65
65
  # the site.
66
66
  attr_reader :rank
67
-
68
- # only available via a details request: the change in rank this video has
69
- # seen since last ranking: if positive, a move up, if negative, a move
70
- # down.
71
- attr_reader :rank_delta
67
+
68
+ # only available via a details request: the rank of this video on the site
69
+ # yesterday.
70
+ attr_reader :rank_yesterday
71
+
72
+ # only available via a details request: the current rank of this video on
73
+ # the site based on email metrics.
74
+ attr_reader :rank_email
75
+
76
+ # only available via a details request: the current rank of this video on
77
+ # the site based on embedded html metrics.
78
+ attr_reader :rank_embed
72
79
 
73
80
  # the number of ratings this video has received.
74
81
  attr_reader :rating_count
@@ -115,7 +122,18 @@ module GoogleVideo
115
122
 
116
123
  # only available via a details request: the number of views this video
117
124
  # received yesterday.
118
- attr_reader :yesterday_view_count
125
+ attr_reader :view_count_yesterday
126
+
127
+ # same as Video#view_count_yesterday providing backwards-compatibility.
128
+ alias_method :yesterday_view_count, :view_count_yesterday
129
+
130
+ # only available via a details request: the number of views this video
131
+ # has received based on email metrics.
132
+ attr_reader :view_count_email
133
+
134
+ # only available via a details request: the number of views this video
135
+ # has received based on embedded html metrics.
136
+ attr_reader :view_count_embed
119
137
 
120
138
  # the default width in pixels for the video embed html.
121
139
  @@DEFAULT_WIDTH = 400
@@ -144,6 +162,13 @@ module GoogleVideo
144
162
  src="http://video.google.com/googleplayer.swf?docId=#{@doc_id}&hl=en" flashvars=""> </embed>
145
163
  edoc
146
164
  end
165
+
166
+ # only available via a details request: the change in rank this video has
167
+ # seen since yesterday's rank: if positive, a move up, if negative, a move
168
+ # down.
169
+ def rank_delta
170
+ (@rank_yesterday - @rank)
171
+ end
147
172
  end
148
173
 
149
174
  # Provides a miniature snapshot of a point in time within a full video. On
@@ -331,7 +356,7 @@ edoc
331
356
  end
332
357
  end
333
358
 
334
- # Describes a response to a VideoSearchQuery.
359
+ # Describes a response to a VideoSearchRequest.
335
360
  class VideoSearchResponse
336
361
  # the url with which the request was made of the Google Video service.
337
362
  attr_reader :request_url
@@ -439,7 +464,7 @@ edoc
439
464
 
440
465
  # parse the overall search query stats
441
466
  regexp_stats = Regexp.new(/([0-9,]+) \- ([0-9,]+)<\/b> of about <b>([0-9,]+)<\/b> \(<b>([0-9.]+)/)
442
- row = (doc/"#resultsheadertable/tr/td/font").first
467
+ row = (doc%"#resultsheadertable/tr/td/font")
443
468
  if !regexp_stats.match(row.inner_html)
444
469
  raise GoogleVideoException.new("failed to parse search query stats")
445
470
  end
@@ -450,18 +475,18 @@ edoc
450
475
  rows = doc/"table[@class='searchresult']/tr"
451
476
  rows.each do |row|
452
477
  # parse the thumbnail image
453
- thumbnail_image_url = _decode_html((row/"img[@class='searchresultimg']").first.attributes['src'])
478
+ thumbnail_image_url = _decode_html((row%"img[@class='searchresultimg']")['src'])
454
479
 
455
480
  # parse the title and page url
456
- a_title = (row/"div[@class='resulttitle']/a").first
457
- page_url = 'http://' + @host + '/' + _decode_html(a_title.attributes['href'])
481
+ a_title = (row%"div[@class='resulttitle']/a")
482
+ page_url = "http://#{@host}/#{_decode_html(a_title['href'])}"
458
483
  title = _decode_html(a_title.inner_html.strip)
459
484
 
460
485
  # parse the description text
461
- description = _decode_html((row/"div[@class='snippet']").first.inner_html.strip)
486
+ description = _decode_html((row%"div[@class='snippet']").inner_html.strip)
462
487
 
463
488
  # parse the upload username
464
- span_channel = (row/"span[@class='channel']").first
489
+ span_channel = (row%"span[@class='channel']")
465
490
  channel_html = (span_channel) ? span_channel.inner_html : ''
466
491
  channel_html =~ /([^\-]+)/
467
492
  upload_user = _clean_string($1)
@@ -470,11 +495,11 @@ edoc
470
495
  star_count = _parse_star_elements(row/"img[@class='star']")
471
496
 
472
497
  # rating count
473
- span_raters = (row/"span[@id='numOfRaters']").first
498
+ span_raters = (row%"span[@id='numOfRaters']")
474
499
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
475
500
 
476
501
  # duration
477
- span_date = (row/"span[@class='date']").first
502
+ span_date = (row%"span[@class='date']")
478
503
  date_html = span_date.inner_html
479
504
  date_html =~ /([^\-]+) \- (.*)$/
480
505
  duration = _clean_string($1)
@@ -519,20 +544,20 @@ edoc
519
544
  title = (doc/"div[@id='pvprogtitle']").inner_html.strip
520
545
 
521
546
  # parse description
522
- font_description = (doc/"div[@id='description']/font").first
547
+ font_description = (doc%"div[@id='description']/font")
523
548
  description = (font_description) ? font_description.all_text.strip : ''
524
- span_wholedescr = (doc/"span[@id='wholedescr']").first
549
+ span_wholedescr = (doc%"span[@id='wholedescr']")
525
550
  if (span_wholedescr)
526
551
  description += ' ' + span_wholedescr.all_text.strip
527
552
  end
528
553
  description = _decode_html(description)
529
554
 
530
555
  # parse star count
531
- span_rating = (doc/"span[@id='communityRating']").first
556
+ span_rating = (doc%"span[@id='communityRating']")
532
557
  star_count = _parse_star_elements(span_rating/"img[@class='star']")
533
558
 
534
559
  # parse rating count
535
- span_raters = (doc/"span[@id='numOfRaters']").first
560
+ span_raters = (doc%"span[@id='numOfRaters']")
536
561
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
537
562
 
538
563
  # parse upload user, duration, upload date, upload user domain, upload
@@ -542,7 +567,7 @@ edoc
542
567
  # rows of text, with only the middle row (in the three-row scenario)
543
568
  # containing duration and upload date, omnipresent. still, we buckle
544
569
  # down and have at it with fervor and tenacity.
545
- duration_etc_html = (doc/"div[@id='durationetc']").first.inner_html
570
+ duration_etc_html = (doc%"div[@id='durationetc']").inner_html
546
571
  duration_parts = duration_etc_html.split(/<br[^>]+>/)
547
572
  # see if the first line looks like it has a date formatted ala 'Nov 9, 2006'
548
573
  if (duration_parts[0] =~ /\- [A-Za-z]{3} \d+, \d{4}/)
@@ -563,7 +588,7 @@ edoc
563
588
 
564
589
  # parse the upload user url and domain if present
565
590
  if (upload_user_domain =~ /<a.*?href="([^"]+)"[^>]+>([^<]+)<\/a>/)
566
- upload_user_url = 'http://' + @host + _decode_html(_clean_string($1))
591
+ upload_user_url = "http://#{@host}#{_decode_html(_clean_string($1))}"
567
592
  upload_user_domain = _clean_string($2)
568
593
  else
569
594
  upload_user_url = ''
@@ -571,28 +596,21 @@ edoc
571
596
  end
572
597
 
573
598
  # pull out view count and rank info table row elements
574
- ( tr_total_views, tr_views_yesterday, tr_rank ) = (doc/"table[@id='statsall']/tr")
575
-
576
- # parse view count
577
- tr_total_views.inner_html =~ /<b>([^<]+)<\/b>/
578
- view_count = _human_number_to_int($1)
579
-
580
- # parse yesterday's view count
581
- tr_views_yesterday.inner_html =~ /\s+([0-9,]+) yesterday/
582
- yesterday_view_count = _human_number_to_int($1)
599
+ tr_statsall = (doc/"div[@id='statsall']/table/tr")
583
600
 
584
- # parse rank
585
- tr_rank.inner_html =~ /rank ([0-9,]+)/
586
- rank = _human_number_to_int($1)
601
+ # remove the first row which just contains header info
602
+ tr_statsall.shift
587
603
 
588
- # parse rank delta
589
- (tr_rank/"span").first.inner_html =~ /\(([0-9\+\-,]+)\)/
590
- rank_delta = _human_number_to_int($1)
604
+ # parse each of the view count and rank rows
605
+ (view_count, rank) = _parse_statsall_row(tr_statsall.shift)
606
+ (view_count_yesterday, rank_yesterday) = _parse_statsall_row(tr_statsall.shift)
607
+ (view_count_email, rank_email) = _parse_statsall_row(tr_statsall.shift)
608
+ (view_count_embed, rank_embed) = _parse_statsall_row(tr_statsall.shift)
591
609
 
592
610
  # pull out the url to the video .gvp file if prsent
593
- img_download = (doc/"img[@src='/static/btn_download.gif']").first
611
+ img_download = (doc%"img[@src='/static/btn_download.gif']")
594
612
  if (img_download)
595
- onclick_html = img_download.attributes['onclick']
613
+ onclick_html = img_download['onclick']
596
614
  onclick_script = _decode_html(onclick_html)
597
615
  onclick_script =~ /onDownloadClick\(([^\)]+)\)/
598
616
  video_file_url = onclick_script.split(",")[1].gsub(/"/, '')
@@ -608,7 +626,7 @@ edoc
608
626
 
609
627
  # pull out the playlist entries
610
628
  playlist_entries = []
611
- table_upnext = (doc/"table[@id='upnexttable']").first
629
+ table_upnext = (doc%"table[@id='upnexttable']")
612
630
  (table_upnext/"tr").each do |tr_playlist|
613
631
  playlist_entries << _parse_playlist_entry(tr_playlist)
614
632
  end
@@ -619,7 +637,9 @@ edoc
619
637
  :page_url => url,
620
638
  :playlist_entries => playlist_entries,
621
639
  :rank => rank,
622
- :rank_delta => rank_delta,
640
+ :rank_yesterday => rank_yesterday,
641
+ :rank_email => rank_email,
642
+ :rank_embed => rank_embed,
623
643
  :rating_count => rating_count,
624
644
  :star_count => star_count,
625
645
  :title => title,
@@ -630,7 +650,9 @@ edoc
630
650
  :video_file_url => video_file_url,
631
651
  :video_frame_thumbnails => video_frame_thumbnails,
632
652
  :view_count => view_count,
633
- :yesterday_view_count => yesterday_view_count)
653
+ :view_count_yesterday => view_count_yesterday,
654
+ :view_count_email => view_count_email,
655
+ :view_count_embed => view_count_embed)
634
656
 
635
657
  # build and return the response
636
658
  VideoDetailsResponse.new(:request_url => url, :video => video)
@@ -675,11 +697,11 @@ edoc
675
697
  rank_yesterday = td_rank_yesterday.inner_html.to_i
676
698
 
677
699
  # parse the video thumbnail image
678
- thumbnail_image_url = _decode_html((td_thumbnail/"a/img").first.attributes['src'])
700
+ thumbnail_image_url = _decode_html((td_thumbnail%"a/img")['src'])
679
701
 
680
702
  # parse the detailed video info
681
- a_video = (td_detail/"a").first
682
- page_url = 'http://' + @host + a_video.attributes['href']
703
+ a_video = (td_detail%"a")
704
+ page_url = "http://#{@host}#{a_video['href']}"
683
705
 
684
706
  # title
685
707
  title = _decode_html(a_video.inner_html.strip)
@@ -688,11 +710,11 @@ edoc
688
710
  star_count = _parse_star_elements(td_detail/"div[@class='meta']/span/font/img[@class='star']")
689
711
 
690
712
  # rating count
691
- span_raters = (td_detail/"div[@class='meta']/span/font/span[@id='numOfRaters']").first
713
+ span_raters = (td_detail%"div[@class='meta']/span/font/span[@id='numOfRaters']")
692
714
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
693
715
 
694
716
  # duration
695
- duration = (td_detail/"div[@class='meta']").first.all_text.gsub(/&nbsp;/, '').strip
717
+ duration = (td_detail%"div[@class='meta']").all_text.gsub(/&nbsp;/, '').strip
696
718
 
697
719
  # description
698
720
  description = _decode_html((td_detail).all_text.strip)
@@ -718,12 +740,31 @@ edoc
718
740
  end
719
741
 
720
742
  private
743
+
744
+ # Breakout method used by Client#video_details to make things more
745
+ # manageable, returning a tuple as (view count, rank) for the supplied
746
+ # data row within the "statsall" div element's statistics table.
747
+ def _parse_statsall_row (row)
748
+ td_alltime = row/"td"
749
+ view_count = _human_number_to_int(_parse_number(td_alltime[1].all_text))
750
+ rank = _human_number_to_int(_parse_number(td_alltime[2].all_text))
751
+ [ view_count, rank ]
752
+ end
753
+
754
+ # Given html containing some set of garbage and a human-readable number
755
+ # somewhere therein, e.g. "&nbsp; 554,200", returns an integer
756
+ # representing just the number.
757
+ def _parse_number (html)
758
+ html =~ /([0-9,\-]+)/
759
+ $1
760
+ end
761
+
721
762
  # Breakout method used by Client#video_details to make things more
722
763
  # manageable, returning a VideoFrameThumbnail constructed from the given
723
764
  # image element.
724
765
  def _parse_video_frame_thumbnail (frame_image)
725
- title = frame_image.attributes['title']
726
- thumbnail_image_url = _decode_html(frame_image.attributes['src'])
766
+ title = frame_image['title']
767
+ thumbnail_image_url = _decode_html(frame_image['src'])
727
768
  VideoFrameThumbnail.new(:title => title, :thumbnail_image_url => thumbnail_image_url)
728
769
  end
729
770
 
@@ -735,15 +776,15 @@ edoc
735
776
  ( td_thumbnail, td_playlist ) = (tr_playlist/"td")
736
777
 
737
778
  # parse thumbnail image
738
- thumbnail_image_url = _decode_html((td_thumbnail/"img").first.attributes['src'])
779
+ thumbnail_image_url = _decode_html((td_thumbnail%"img")['src'])
739
780
 
740
781
  # parse the page url and title
741
- a_page = (td_playlist/"a").first
742
- page_url = 'http://' + @host + a_page.attributes['href']
743
- title = _decode_html(a_page.attributes['title'])
782
+ a_page = (td_playlist%"a")
783
+ page_url = "http://#{@host}#{a_page['href']}"
784
+ title = _decode_html(a_page['title'])
744
785
 
745
786
  # parse the upload user and duration
746
- meta_html = (td_playlist/"span[@class='meta']").first.inner_html
787
+ meta_html = (td_playlist%"span[@class='meta']").inner_html
747
788
  if (meta_html =~ /([^<]+)<br \/>(.*)/)
748
789
  upload_user = _clean_string($1)
749
790
  duration = _clean_string($2)
@@ -774,7 +815,7 @@ edoc
774
815
  def _parse_star_elements (elements)
775
816
  star_count = 0
776
817
  elements.each do |star|
777
- star_src = star.attributes['src']
818
+ star_src = star['src']
778
819
  if (star_src =~ /starLittle\.gif$/)
779
820
  star_count += 1
780
821
  elsif (star_src =~ /starLittleHalf\.gif$/)
@@ -793,7 +834,7 @@ edoc
793
834
  url = "http://#{@host}/videoranking"
794
835
  if top_request.country
795
836
  country_code = TopVideosRequest::COUNTRIES[top_request.country]
796
- url += "?cr=#{country_code}"
837
+ url.concat "?cr=#{country_code}"
797
838
  end
798
839
  url
799
840
  end
@@ -801,17 +842,17 @@ edoc
801
842
  def _search_url (search_request)
802
843
  query = search_request.query
803
844
  if (search_request.duration && search_request.duration != 'all')
804
- query += '+duration:' + search_request.duration
845
+ query.concat "+duration:#{search_request.duration}"
805
846
  end
806
847
 
807
848
  url = "http://#{@host}/videosearch?q=#{URI.encode(query)}"
808
849
 
809
850
  if (search_request.page)
810
- url += '&page=' + URI.encode(search_request.page.to_s)
851
+ url.concat "&page=#{URI.encode(search_request.page.to_s)}"
811
852
  end
812
853
 
813
854
  if (search_request.sort)
814
- url += '&so=' + URI.encode(search_request.sort)
855
+ url.concat "&so=#{URI.encode(search_request.sort)}"
815
856
  end
816
857
 
817
858
  url
@@ -87,10 +87,19 @@ class TestVideoDetails < Test::Unit::TestCase
87
87
 
88
88
  # poke and prod the video object
89
89
  assert_valid_video video
90
- assert (video.view_count >= video.yesterday_view_count)
90
+ assert (video.view_count >= video.view_count_yesterday)
91
+ assert (video.view_count >= video.view_count_embed)
92
+ assert (video.view_count >= video.view_count_email)
91
93
  assert_match /http:\/\/video\.google\.com\/videogvp\/[^\.]+\.gvp\?docid=[0-9\-\+]+/, video.video_file_url
92
94
  assert_kind_of Numeric, video.rank
93
- assert_kind_of Numeric, video.rank_delta
95
+ assert_kind_of Numeric, video.rank_yesterday
96
+ assert_kind_of Numeric, video.rank_email
97
+ assert_kind_of Numeric, video.rank_embed
98
+ assert_kind_of Numeric, video.view_count
99
+ assert_kind_of Numeric, video.view_count_yesterday
100
+ assert_kind_of Numeric, video.view_count_email
101
+ assert_kind_of Numeric, video.view_count_embed
102
+ assert_equal video.view_count_yesterday, video.yesterday_view_count
94
103
  assert_instance_of Time, video.upload_date
95
104
 
96
105
  # check the video frame thumbnails
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: google-video
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.5.0
7
- date: 2006-11-09 00:00:00 -08:00
6
+ version: 0.6.0
7
+ date: 2006-11-16 00:00:00 -08:00
8
8
  summary: A Ruby object-oriented interface to Google Video content.
9
9
  require_paths:
10
10
  - lib