google-video 0.5.0 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,24 @@
1
+ * 2006/11/16
2
+
3
+ - [shaper] renamed Video#yesterday_view_count to
4
+ Video#view_count_yesterday for consistency, and aliased to maintain
5
+ backwards-compatibility.
6
+
7
+ - [shaper] Updated to reflect video detail page edits:
8
+ + parse new metrics on email and embed count/rank for today and yesterday
9
+ + correct parsing to accurately grab from new layout of rank and view
10
+ count for today and yesterday
11
+ + turned Video#rank_delta into a method that returns the delta between
12
+ yesterday and today's rank.
13
+
14
+ - [shaper] Updated hpricot usage for brevity: use Element#[] for referencing
15
+ attributes and Element#% for slicing out elements where we know there
16
+ can be only one.
17
+
18
+ - [shaper] Fixed typo in rdoc for VideoSearchResponse.
19
+
20
+ - [shaper] Optimized string building for URLs and the like.
21
+
1
22
  * 2006/11/07
2
23
 
3
24
  - [shaper] Initial version.
data/Rakefile CHANGED
@@ -6,7 +6,7 @@ require 'rake/gempackagetask'
6
6
 
7
7
  spec = Gem::Specification.new do |s|
8
8
  s.name = 'google-video'
9
- s.version = '0.5.0'
9
+ s.version = '0.6.0'
10
10
  s.author = 'Walter Korman'
11
11
  s.email = 'shaper@wgks.org'
12
12
  s.platform = Gem::Platform::RUBY
@@ -64,11 +64,18 @@ module GoogleVideo
64
64
  # only available via a details request: the current rank of this video on
65
65
  # the site.
66
66
  attr_reader :rank
67
-
68
- # only available via a details request: the change in rank this video has
69
- # seen since last ranking: if positive, a move up, if negative, a move
70
- # down.
71
- attr_reader :rank_delta
67
+
68
+ # only available via a details request: the rank of this video on the site
69
+ # yesterday.
70
+ attr_reader :rank_yesterday
71
+
72
+ # only available via a details request: the current rank of this video on
73
+ # the site based on email metrics.
74
+ attr_reader :rank_email
75
+
76
+ # only available via a details request: the current rank of this video on
77
+ # the site based on embedded html metrics.
78
+ attr_reader :rank_embed
72
79
 
73
80
  # the number of ratings this video has received.
74
81
  attr_reader :rating_count
@@ -115,7 +122,18 @@ module GoogleVideo
115
122
 
116
123
  # only available via a details request: the number of views this video
117
124
  # received yesterday.
118
- attr_reader :yesterday_view_count
125
+ attr_reader :view_count_yesterday
126
+
127
+ # same as Video#view_count_yesterday providing backwards-compatibility.
128
+ alias_method :yesterday_view_count, :view_count_yesterday
129
+
130
+ # only available via a details request: the number of views this video
131
+ # has received based on email metrics.
132
+ attr_reader :view_count_email
133
+
134
+ # only available via a details request: the number of views this video
135
+ # has received based on embedded html metrics.
136
+ attr_reader :view_count_embed
119
137
 
120
138
  # the default width in pixels for the video embed html.
121
139
  @@DEFAULT_WIDTH = 400
@@ -144,6 +162,13 @@ module GoogleVideo
144
162
  src="http://video.google.com/googleplayer.swf?docId=#{@doc_id}&hl=en" flashvars=""> </embed>
145
163
  edoc
146
164
  end
165
+
166
+ # only available via a details request: the change in rank this video has
167
+ # seen since yesterday's rank: if positive, a move up, if negative, a move
168
+ # down.
169
+ def rank_delta
170
+ (@rank_yesterday - @rank)
171
+ end
147
172
  end
148
173
 
149
174
  # Provides a miniature snapshot of a point in time within a full video. On
@@ -331,7 +356,7 @@ edoc
331
356
  end
332
357
  end
333
358
 
334
- # Describes a response to a VideoSearchQuery.
359
+ # Describes a response to a VideoSearchRequest.
335
360
  class VideoSearchResponse
336
361
  # the url with which the request was made of the Google Video service.
337
362
  attr_reader :request_url
@@ -439,7 +464,7 @@ edoc
439
464
 
440
465
  # parse the overall search query stats
441
466
  regexp_stats = Regexp.new(/([0-9,]+) \- ([0-9,]+)<\/b> of about <b>([0-9,]+)<\/b> \(<b>([0-9.]+)/)
442
- row = (doc/"#resultsheadertable/tr/td/font").first
467
+ row = (doc%"#resultsheadertable/tr/td/font")
443
468
  if !regexp_stats.match(row.inner_html)
444
469
  raise GoogleVideoException.new("failed to parse search query stats")
445
470
  end
@@ -450,18 +475,18 @@ edoc
450
475
  rows = doc/"table[@class='searchresult']/tr"
451
476
  rows.each do |row|
452
477
  # parse the thumbnail image
453
- thumbnail_image_url = _decode_html((row/"img[@class='searchresultimg']").first.attributes['src'])
478
+ thumbnail_image_url = _decode_html((row%"img[@class='searchresultimg']")['src'])
454
479
 
455
480
  # parse the title and page url
456
- a_title = (row/"div[@class='resulttitle']/a").first
457
- page_url = 'http://' + @host + '/' + _decode_html(a_title.attributes['href'])
481
+ a_title = (row%"div[@class='resulttitle']/a")
482
+ page_url = "http://#{@host}/#{_decode_html(a_title['href'])}"
458
483
  title = _decode_html(a_title.inner_html.strip)
459
484
 
460
485
  # parse the description text
461
- description = _decode_html((row/"div[@class='snippet']").first.inner_html.strip)
486
+ description = _decode_html((row%"div[@class='snippet']").inner_html.strip)
462
487
 
463
488
  # parse the upload username
464
- span_channel = (row/"span[@class='channel']").first
489
+ span_channel = (row%"span[@class='channel']")
465
490
  channel_html = (span_channel) ? span_channel.inner_html : ''
466
491
  channel_html =~ /([^\-]+)/
467
492
  upload_user = _clean_string($1)
@@ -470,11 +495,11 @@ edoc
470
495
  star_count = _parse_star_elements(row/"img[@class='star']")
471
496
 
472
497
  # rating count
473
- span_raters = (row/"span[@id='numOfRaters']").first
498
+ span_raters = (row%"span[@id='numOfRaters']")
474
499
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
475
500
 
476
501
  # duration
477
- span_date = (row/"span[@class='date']").first
502
+ span_date = (row%"span[@class='date']")
478
503
  date_html = span_date.inner_html
479
504
  date_html =~ /([^\-]+) \- (.*)$/
480
505
  duration = _clean_string($1)
@@ -519,20 +544,20 @@ edoc
519
544
  title = (doc/"div[@id='pvprogtitle']").inner_html.strip
520
545
 
521
546
  # parse description
522
- font_description = (doc/"div[@id='description']/font").first
547
+ font_description = (doc%"div[@id='description']/font")
523
548
  description = (font_description) ? font_description.all_text.strip : ''
524
- span_wholedescr = (doc/"span[@id='wholedescr']").first
549
+ span_wholedescr = (doc%"span[@id='wholedescr']")
525
550
  if (span_wholedescr)
526
551
  description += ' ' + span_wholedescr.all_text.strip
527
552
  end
528
553
  description = _decode_html(description)
529
554
 
530
555
  # parse star count
531
- span_rating = (doc/"span[@id='communityRating']").first
556
+ span_rating = (doc%"span[@id='communityRating']")
532
557
  star_count = _parse_star_elements(span_rating/"img[@class='star']")
533
558
 
534
559
  # parse rating count
535
- span_raters = (doc/"span[@id='numOfRaters']").first
560
+ span_raters = (doc%"span[@id='numOfRaters']")
536
561
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
537
562
 
538
563
  # parse upload user, duration, upload date, upload user domain, upload
@@ -542,7 +567,7 @@ edoc
542
567
  # rows of text, with only the middle row (in the three-row scenario)
543
568
  # containing duration and upload date, omnipresent. still, we buckle
544
569
  # down and have at it with fervor and tenacity.
545
- duration_etc_html = (doc/"div[@id='durationetc']").first.inner_html
570
+ duration_etc_html = (doc%"div[@id='durationetc']").inner_html
546
571
  duration_parts = duration_etc_html.split(/<br[^>]+>/)
547
572
  # see if the first line looks like it has a date formatted ala 'Nov 9, 2006'
548
573
  if (duration_parts[0] =~ /\- [A-Za-z]{3} \d+, \d{4}/)
@@ -563,7 +588,7 @@ edoc
563
588
 
564
589
  # parse the upload user url and domain if present
565
590
  if (upload_user_domain =~ /<a.*?href="([^"]+)"[^>]+>([^<]+)<\/a>/)
566
- upload_user_url = 'http://' + @host + _decode_html(_clean_string($1))
591
+ upload_user_url = "http://#{@host}#{_decode_html(_clean_string($1))}"
567
592
  upload_user_domain = _clean_string($2)
568
593
  else
569
594
  upload_user_url = ''
@@ -571,28 +596,21 @@ edoc
571
596
  end
572
597
 
573
598
  # pull out view count and rank info table row elements
574
- ( tr_total_views, tr_views_yesterday, tr_rank ) = (doc/"table[@id='statsall']/tr")
575
-
576
- # parse view count
577
- tr_total_views.inner_html =~ /<b>([^<]+)<\/b>/
578
- view_count = _human_number_to_int($1)
579
-
580
- # parse yesterday's view count
581
- tr_views_yesterday.inner_html =~ /\s+([0-9,]+) yesterday/
582
- yesterday_view_count = _human_number_to_int($1)
599
+ tr_statsall = (doc/"div[@id='statsall']/table/tr")
583
600
 
584
- # parse rank
585
- tr_rank.inner_html =~ /rank ([0-9,]+)/
586
- rank = _human_number_to_int($1)
601
+ # remove the first row which just contains header info
602
+ tr_statsall.shift
587
603
 
588
- # parse rank delta
589
- (tr_rank/"span").first.inner_html =~ /\(([0-9\+\-,]+)\)/
590
- rank_delta = _human_number_to_int($1)
604
+ # parse each of the view count and rank rows
605
+ (view_count, rank) = _parse_statsall_row(tr_statsall.shift)
606
+ (view_count_yesterday, rank_yesterday) = _parse_statsall_row(tr_statsall.shift)
607
+ (view_count_email, rank_email) = _parse_statsall_row(tr_statsall.shift)
608
+ (view_count_embed, rank_embed) = _parse_statsall_row(tr_statsall.shift)
591
609
 
592
610
  # pull out the url to the video .gvp file if prsent
593
- img_download = (doc/"img[@src='/static/btn_download.gif']").first
611
+ img_download = (doc%"img[@src='/static/btn_download.gif']")
594
612
  if (img_download)
595
- onclick_html = img_download.attributes['onclick']
613
+ onclick_html = img_download['onclick']
596
614
  onclick_script = _decode_html(onclick_html)
597
615
  onclick_script =~ /onDownloadClick\(([^\)]+)\)/
598
616
  video_file_url = onclick_script.split(",")[1].gsub(/"/, '')
@@ -608,7 +626,7 @@ edoc
608
626
 
609
627
  # pull out the playlist entries
610
628
  playlist_entries = []
611
- table_upnext = (doc/"table[@id='upnexttable']").first
629
+ table_upnext = (doc%"table[@id='upnexttable']")
612
630
  (table_upnext/"tr").each do |tr_playlist|
613
631
  playlist_entries << _parse_playlist_entry(tr_playlist)
614
632
  end
@@ -619,7 +637,9 @@ edoc
619
637
  :page_url => url,
620
638
  :playlist_entries => playlist_entries,
621
639
  :rank => rank,
622
- :rank_delta => rank_delta,
640
+ :rank_yesterday => rank_yesterday,
641
+ :rank_email => rank_email,
642
+ :rank_embed => rank_embed,
623
643
  :rating_count => rating_count,
624
644
  :star_count => star_count,
625
645
  :title => title,
@@ -630,7 +650,9 @@ edoc
630
650
  :video_file_url => video_file_url,
631
651
  :video_frame_thumbnails => video_frame_thumbnails,
632
652
  :view_count => view_count,
633
- :yesterday_view_count => yesterday_view_count)
653
+ :view_count_yesterday => view_count_yesterday,
654
+ :view_count_email => view_count_email,
655
+ :view_count_embed => view_count_embed)
634
656
 
635
657
  # build and return the response
636
658
  VideoDetailsResponse.new(:request_url => url, :video => video)
@@ -675,11 +697,11 @@ edoc
675
697
  rank_yesterday = td_rank_yesterday.inner_html.to_i
676
698
 
677
699
  # parse the video thumbnail image
678
- thumbnail_image_url = _decode_html((td_thumbnail/"a/img").first.attributes['src'])
700
+ thumbnail_image_url = _decode_html((td_thumbnail%"a/img")['src'])
679
701
 
680
702
  # parse the detailed video info
681
- a_video = (td_detail/"a").first
682
- page_url = 'http://' + @host + a_video.attributes['href']
703
+ a_video = (td_detail%"a")
704
+ page_url = "http://#{@host}#{a_video['href']}"
683
705
 
684
706
  # title
685
707
  title = _decode_html(a_video.inner_html.strip)
@@ -688,11 +710,11 @@ edoc
688
710
  star_count = _parse_star_elements(td_detail/"div[@class='meta']/span/font/img[@class='star']")
689
711
 
690
712
  # rating count
691
- span_raters = (td_detail/"div[@class='meta']/span/font/span[@id='numOfRaters']").first
713
+ span_raters = (td_detail%"div[@class='meta']/span/font/span[@id='numOfRaters']")
692
714
  rating_count = (span_raters) ? span_raters.inner_html.to_i : 0
693
715
 
694
716
  # duration
695
- duration = (td_detail/"div[@class='meta']").first.all_text.gsub(/&nbsp;/, '').strip
717
+ duration = (td_detail%"div[@class='meta']").all_text.gsub(/&nbsp;/, '').strip
696
718
 
697
719
  # description
698
720
  description = _decode_html((td_detail).all_text.strip)
@@ -718,12 +740,31 @@ edoc
718
740
  end
719
741
 
720
742
  private
743
+
744
+ # Breakout method used by Client#video_details to make things more
745
+ # manageable, returning a tuple as (view count, rank) for the supplied
746
+ # data row within the "statsall" div element's statistics table.
747
+ def _parse_statsall_row (row)
748
+ td_alltime = row/"td"
749
+ view_count = _human_number_to_int(_parse_number(td_alltime[1].all_text))
750
+ rank = _human_number_to_int(_parse_number(td_alltime[2].all_text))
751
+ [ view_count, rank ]
752
+ end
753
+
754
+ # Given html containing some set of garbage and a human-readable number
755
+ # somewhere therein, e.g. "&nbsp; 554,200", returns an integer
756
+ # representing just the number.
757
+ def _parse_number (html)
758
+ html =~ /([0-9,\-]+)/
759
+ $1
760
+ end
761
+
721
762
  # Breakout method used by Client#video_details to make things more
722
763
  # manageable, returning a VideoFrameThumbnail constructed from the given
723
764
  # image element.
724
765
  def _parse_video_frame_thumbnail (frame_image)
725
- title = frame_image.attributes['title']
726
- thumbnail_image_url = _decode_html(frame_image.attributes['src'])
766
+ title = frame_image['title']
767
+ thumbnail_image_url = _decode_html(frame_image['src'])
727
768
  VideoFrameThumbnail.new(:title => title, :thumbnail_image_url => thumbnail_image_url)
728
769
  end
729
770
 
@@ -735,15 +776,15 @@ edoc
735
776
  ( td_thumbnail, td_playlist ) = (tr_playlist/"td")
736
777
 
737
778
  # parse thumbnail image
738
- thumbnail_image_url = _decode_html((td_thumbnail/"img").first.attributes['src'])
779
+ thumbnail_image_url = _decode_html((td_thumbnail%"img")['src'])
739
780
 
740
781
  # parse the page url and title
741
- a_page = (td_playlist/"a").first
742
- page_url = 'http://' + @host + a_page.attributes['href']
743
- title = _decode_html(a_page.attributes['title'])
782
+ a_page = (td_playlist%"a")
783
+ page_url = "http://#{@host}#{a_page['href']}"
784
+ title = _decode_html(a_page['title'])
744
785
 
745
786
  # parse the upload user and duration
746
- meta_html = (td_playlist/"span[@class='meta']").first.inner_html
787
+ meta_html = (td_playlist%"span[@class='meta']").inner_html
747
788
  if (meta_html =~ /([^<]+)<br \/>(.*)/)
748
789
  upload_user = _clean_string($1)
749
790
  duration = _clean_string($2)
@@ -774,7 +815,7 @@ edoc
774
815
  def _parse_star_elements (elements)
775
816
  star_count = 0
776
817
  elements.each do |star|
777
- star_src = star.attributes['src']
818
+ star_src = star['src']
778
819
  if (star_src =~ /starLittle\.gif$/)
779
820
  star_count += 1
780
821
  elsif (star_src =~ /starLittleHalf\.gif$/)
@@ -793,7 +834,7 @@ edoc
793
834
  url = "http://#{@host}/videoranking"
794
835
  if top_request.country
795
836
  country_code = TopVideosRequest::COUNTRIES[top_request.country]
796
- url += "?cr=#{country_code}"
837
+ url.concat "?cr=#{country_code}"
797
838
  end
798
839
  url
799
840
  end
@@ -801,17 +842,17 @@ edoc
801
842
  def _search_url (search_request)
802
843
  query = search_request.query
803
844
  if (search_request.duration && search_request.duration != 'all')
804
- query += '+duration:' + search_request.duration
845
+ query.concat "+duration:#{search_request.duration}"
805
846
  end
806
847
 
807
848
  url = "http://#{@host}/videosearch?q=#{URI.encode(query)}"
808
849
 
809
850
  if (search_request.page)
810
- url += '&page=' + URI.encode(search_request.page.to_s)
851
+ url.concat "&page=#{URI.encode(search_request.page.to_s)}"
811
852
  end
812
853
 
813
854
  if (search_request.sort)
814
- url += '&so=' + URI.encode(search_request.sort)
855
+ url.concat "&so=#{URI.encode(search_request.sort)}"
815
856
  end
816
857
 
817
858
  url
@@ -87,10 +87,19 @@ class TestVideoDetails < Test::Unit::TestCase
87
87
 
88
88
  # poke and prod the video object
89
89
  assert_valid_video video
90
- assert (video.view_count >= video.yesterday_view_count)
90
+ assert (video.view_count >= video.view_count_yesterday)
91
+ assert (video.view_count >= video.view_count_embed)
92
+ assert (video.view_count >= video.view_count_email)
91
93
  assert_match /http:\/\/video\.google\.com\/videogvp\/[^\.]+\.gvp\?docid=[0-9\-\+]+/, video.video_file_url
92
94
  assert_kind_of Numeric, video.rank
93
- assert_kind_of Numeric, video.rank_delta
95
+ assert_kind_of Numeric, video.rank_yesterday
96
+ assert_kind_of Numeric, video.rank_email
97
+ assert_kind_of Numeric, video.rank_embed
98
+ assert_kind_of Numeric, video.view_count
99
+ assert_kind_of Numeric, video.view_count_yesterday
100
+ assert_kind_of Numeric, video.view_count_email
101
+ assert_kind_of Numeric, video.view_count_embed
102
+ assert_equal video.view_count_yesterday, video.yesterday_view_count
94
103
  assert_instance_of Time, video.upload_date
95
104
 
96
105
  # check the video frame thumbnails
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: google-video
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.5.0
7
- date: 2006-11-09 00:00:00 -08:00
6
+ version: 0.6.0
7
+ date: 2006-11-16 00:00:00 -08:00
8
8
  summary: A Ruby object-oriented interface to Google Video content.
9
9
  require_paths:
10
10
  - lib