tv-dot-com 0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +5 -0
- data/Manifest.txt +7 -0
- data/README.txt +33 -0
- data/Rakefile +15 -0
- data/bin/tv_dot_com +0 -0
- data/lib/tv_dot_com.rb +140 -0
- data/test/test_tv_dot_com.rb +0 -0
- metadata +60 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
TvDotCom
|
2
|
+
by Doug Alcorn <doug@lathi.net>
|
3
|
+
http://tv-dot-com.rubyforge.org/
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
TV Show and Episode classes scraped from TV.com and Epguide.com useful for fetching meta data on TV Shows.
|
8
|
+
|
9
|
+
== FEATURES/PROBLEMS:
|
10
|
+
|
11
|
+
* Show names must be in the format as listed in the url portion of epguides.com
|
12
|
+
* Doesn't work well for shows that aren't listed on epguides.com
|
13
|
+
* Only as good as the data on epguides.com and tv.com
|
14
|
+
|
15
|
+
== SYNOPSIS:
|
16
|
+
|
17
|
+
FIX (code sample of usage)
|
18
|
+
|
19
|
+
== REQUIREMENTS:
|
20
|
+
|
21
|
+
* Hpricot
|
22
|
+
|
23
|
+
== INSTALL:
|
24
|
+
|
25
|
+
* sudo gem install tv_dot_com
|
26
|
+
|
27
|
+
== LICENSE:
|
28
|
+
|
29
|
+
(The Ruby License)
|
30
|
+
|
31
|
+
Copyright (c) 2007 Doug Alcorn <doug@lathi.net>
|
32
|
+
|
33
|
+
See LICENSE.txt or COPYING.txt
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require './lib/tv_dot_com.rb'
|
6
|
+
|
7
|
+
Hoe.new('tv-dot-com', TvDotCom::VERSION) do |p|
|
8
|
+
p.rubyforge_name = 'tv-dot-com'
|
9
|
+
p.summary = 'TV Show and Episode classes scraped from TV.com and Epguide.com useful for fetching meta data on TV Shows.'
|
10
|
+
# p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
|
11
|
+
# p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
|
12
|
+
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
13
|
+
end
|
14
|
+
|
15
|
+
# vim: syntax=Ruby
|
data/bin/tv_dot_com
ADDED
File without changes
|
data/lib/tv_dot_com.rb
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'hpricot'
|
3
|
+
require 'date'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'breakpoint'
|
6
|
+
|
7
|
+
module TvDotCom
|
8
|
+
VERSION = '0.3'
|
9
|
+
|
10
|
+
class Show
|
11
|
+
attr_reader :url
|
12
|
+
attr_reader :request
|
13
|
+
attr_reader :raw_guide
|
14
|
+
|
15
|
+
def initialize(url)
|
16
|
+
@request = url
|
17
|
+
@doc = Hpricot(open(@url = Show.guides_url(url)))
|
18
|
+
end
|
19
|
+
|
20
|
+
# the name of the show
|
21
|
+
def name
|
22
|
+
@name ||= @doc.search("//h2[@class='mb-10']").first.inner_html.sub(/ Episode Guide/, '')
|
23
|
+
end
|
24
|
+
|
25
|
+
# loads all the episodes for this show and selects the Episode by
|
26
|
+
# the season number, and episode number within that season
|
27
|
+
def find_episode_by_season_and_number(season,number)
|
28
|
+
seasons[season.to_i - 1].episodes[number.to_i - 1]
|
29
|
+
end
|
30
|
+
|
31
|
+
# returns the unprocessed raw html of the episode guide
|
32
|
+
def raw_guide
|
33
|
+
@guide ||= Hpricot(open("http://epguides.com/#{@request}/"))/"pre"
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.guides_url(name)
|
37
|
+
if name.match(/^http:\/\/(.*?)\.tv\.com/)
|
38
|
+
name
|
39
|
+
else
|
40
|
+
"http://www.tv.com/show/#{find_show_id(name)}/episode_guide.html&printable=1"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.find_show_id(name)
|
45
|
+
search = name.gsub(/([A-Z])/) { |s| '+' + ('%c' % s[0]).downcase }.gsub(/_/, '+').gsub(/'/,'').gsub(/[^+a-z0-9]/,'+').gsub(/\++/,'+')
|
46
|
+
begin
|
47
|
+
(Hpricot(open("http://www.tv.com/search.php?type=11&stype=program&qs=#{search}&tag=filter;tv_shows"))/"table[@id='search-results'] span[@class='f-18'] a").first.attributes['href'].match(/show\/(\d+)\/summary/)[1] rescue
|
48
|
+
raise "Can't find show id for #{name}"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
def show_id
|
53
|
+
@show_id ||= self.class.find_show_id(name)
|
54
|
+
end
|
55
|
+
|
56
|
+
def show_url
|
57
|
+
"http://www.tv.com/show/#{show_id}/summary.html"
|
58
|
+
end
|
59
|
+
|
60
|
+
def episode_list_url(season=1)
|
61
|
+
"http://www.tv.com/show/#{show_id}/episode_listings.html?season=#{season}&tag=nav_bar;2"
|
62
|
+
end
|
63
|
+
|
64
|
+
def number_of_seasons
|
65
|
+
@number_of_seasons ||= (Hpricot(open(episode_list_url))/"div[@id='season-dropdown'] option").size || 2 - 1
|
66
|
+
@number_of_seasons = 1 if @number_of_seasons < 1
|
67
|
+
@number_of_seasons
|
68
|
+
end
|
69
|
+
|
70
|
+
def seasons
|
71
|
+
@seasons ||= (1..number_of_seasons).collect do |i|
|
72
|
+
Season.new(self, i)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
class Season
|
79
|
+
attr_reader :number, :show
|
80
|
+
|
81
|
+
def initialize(show, newnumber)
|
82
|
+
@show = show
|
83
|
+
@number = newnumber
|
84
|
+
end
|
85
|
+
|
86
|
+
def episode_list
|
87
|
+
@episode_list ||= Hpricot(open(show.episode_list_url(number)))
|
88
|
+
end
|
89
|
+
|
90
|
+
def number_of_episodes
|
91
|
+
(episode_list/"div[@class='table-styled'] table tr").size - 1
|
92
|
+
end
|
93
|
+
|
94
|
+
def episodes
|
95
|
+
rows = episode_list/"div[@class='table-styled'] table tr"
|
96
|
+
rows.shift
|
97
|
+
rows.collect { |row| Episode.new(@show, self, row)}
|
98
|
+
end
|
99
|
+
|
100
|
+
end
|
101
|
+
|
102
|
+
class Episode
|
103
|
+
attr_reader :show, :season
|
104
|
+
|
105
|
+
def initialize(show, season, raw_html)
|
106
|
+
@show = show
|
107
|
+
@season = season
|
108
|
+
@raw_html = raw_html
|
109
|
+
end
|
110
|
+
|
111
|
+
def title
|
112
|
+
title_link_elem.inner_html
|
113
|
+
end
|
114
|
+
|
115
|
+
def number
|
116
|
+
(@raw_html/"td[@class='first f-bold ta-c']").inner_html.match(/\s*(\S*)\s*/)[1]
|
117
|
+
end
|
118
|
+
|
119
|
+
def production_number
|
120
|
+
(@raw_html/"td[@class='f-666 f-11 ta-c']").inner_html.match(/\s*(\S*)\s*/)[1]
|
121
|
+
end
|
122
|
+
|
123
|
+
def season_number
|
124
|
+
@season.number
|
125
|
+
end
|
126
|
+
|
127
|
+
def url
|
128
|
+
title_link_elem.attributes["href"]
|
129
|
+
end
|
130
|
+
|
131
|
+
def first_aired
|
132
|
+
Date.parse((@raw_html/"td[@class='ta-c']:empty").inner_html.match(/\d+\/\d+\/\d+/)[0])
|
133
|
+
end
|
134
|
+
|
135
|
+
def title_link_elem
|
136
|
+
(@raw_html/"a[@href*=summary]").first
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
140
|
+
end
|
File without changes
|
metadata
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.0
|
3
|
+
specification_version: 1
|
4
|
+
name: tv-dot-com
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: "0.3"
|
7
|
+
date: 2007-04-29 00:00:00 -04:00
|
8
|
+
summary: TV Show and Episode classes scraped from TV.com and Epguide.com useful for fetching meta data on TV Shows.
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: ryand-ruby@zenspider.com
|
12
|
+
homepage: http://www.zenspider.com/ZSS/Products/tv-dot-com/
|
13
|
+
rubyforge_project: tv-dot-com
|
14
|
+
description: The author was too lazy to write a description
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Ryan Davis
|
31
|
+
files:
|
32
|
+
- History.txt
|
33
|
+
- Manifest.txt
|
34
|
+
- README.txt
|
35
|
+
- Rakefile
|
36
|
+
- bin/tv_dot_com
|
37
|
+
- lib/tv_dot_com.rb
|
38
|
+
- test/test_tv_dot_com.rb
|
39
|
+
test_files:
|
40
|
+
- test/test_tv_dot_com.rb
|
41
|
+
rdoc_options: []
|
42
|
+
|
43
|
+
extra_rdoc_files: []
|
44
|
+
|
45
|
+
executables:
|
46
|
+
- tv_dot_com
|
47
|
+
extensions: []
|
48
|
+
|
49
|
+
requirements: []
|
50
|
+
|
51
|
+
dependencies:
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
name: hoe
|
54
|
+
version_requirement:
|
55
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: 1.1.7
|
60
|
+
version:
|