imdb_parser 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +8 -0
- data/Gemfile +3 -0
- data/History.txt +74 -0
- data/Manifest.txt +29 -0
- data/README.rdoc +108 -0
- data/Rakefile +35 -0
- data/bin/imdb +10 -0
- data/config/website.yml +2 -0
- data/imdb.gemspec +29 -0
- data/lib/imdb_parser/cli.rb +109 -0
- data/lib/imdb_parser/episode.rb +26 -0
- data/lib/imdb_parser/imdb_base.rb +146 -0
- data/lib/imdb_parser/movie.rb +13 -0
- data/lib/imdb_parser/movie_list.rb +41 -0
- data/lib/imdb_parser/search.rb +46 -0
- data/lib/imdb_parser/season.rb +45 -0
- data/lib/imdb_parser/serie.rb +24 -0
- data/lib/imdb_parser/string_extensions.rb +28 -0
- data/lib/imdb_parser/top_250.rb +10 -0
- data/lib/imdb_parser/version.rb +3 -0
- data/lib/imdb_parser.rb +17 -0
- data/script/console +11 -0
- data/script/destroy +14 -0
- data/script/generate +14 -0
- data/spec/fixtures/search_kannethirey_thondrinal +14 -0
- data/spec/fixtures/search_killed_wife +14 -0
- data/spec/fixtures/search_star_trek +834 -0
- data/spec/fixtures/top_250 +1433 -0
- data/spec/fixtures/tt0036855 +1255 -0
- data/spec/fixtures/tt0083987 +1261 -0
- data/spec/fixtures/tt0095016 +1286 -0
- data/spec/fixtures/tt0110912 +1262 -0
- data/spec/fixtures/tt0111161 +1272 -0
- data/spec/fixtures/tt0117731 +1246 -0
- data/spec/fixtures/tt0166222 +1806 -0
- data/spec/fixtures/tt0242653 +1254 -0
- data/spec/fixtures/tt0330508 +1581 -0
- data/spec/fixtures/tt0468569 +1305 -0
- data/spec/fixtures/tt1401252 +1109 -0
- data/spec/imdb/cli_spec.rb +49 -0
- data/spec/imdb/movie_spec.rb +204 -0
- data/spec/imdb/search_spec.rb +78 -0
- data/spec/imdb/top_250_spec.rb +21 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +57 -0
- data/tasks/fixtures.rake +15 -0
- data/tasks/rspec.rake +21 -0
- metadata +183 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
class Season
|
4
|
+
attr_accessor :id, :url, :season_number, :episodes
|
5
|
+
|
6
|
+
|
7
|
+
def initialize(url)
|
8
|
+
@url = url
|
9
|
+
@season_number = url[-1,1]
|
10
|
+
@episodes = []
|
11
|
+
end
|
12
|
+
|
13
|
+
# Returns numbers of episode
|
14
|
+
def episode_numbers
|
15
|
+
document.search("div div[@class*=season-filter-all filter-season-#{season_number}'] div[@class*=filter-all]").size rescue nil
|
16
|
+
end
|
17
|
+
|
18
|
+
def episode(number = 1)
|
19
|
+
Imdb::Episode.new(number, document.search("div div[@class*=season-filter-all filter-season-#{season_number}'] div[@class*=filter-all]")[number - 1], self)
|
20
|
+
end
|
21
|
+
|
22
|
+
def episodes
|
23
|
+
if @episodes.empty?
|
24
|
+
document.search("div div[@class*=season-filter-all filter-season-#{season_number}'] div[@class*=filter-all]").each_with_index do |node_episode, i|
|
25
|
+
@episodes << Imdb::Episode.new(i + 1, node_episode, self)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
@episodes
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
# Returns a new Hpricot document for parsing.
|
34
|
+
def document
|
35
|
+
@document ||= Hpricot(Imdb::Season.find_by_season(@url))
|
36
|
+
end
|
37
|
+
|
38
|
+
# Use HTTParty to fetch the raw HTML for this season
|
39
|
+
def self.find_by_season(url)
|
40
|
+
open(url)
|
41
|
+
end
|
42
|
+
|
43
|
+
end # Season
|
44
|
+
|
45
|
+
end # Imdb
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Imdb
|
2
|
+
|
3
|
+
class Serie < ImdbBase
|
4
|
+
|
5
|
+
# s = Imdb::Serie.new("0773262")
|
6
|
+
# e = s.seasons.first.episodes.first
|
7
|
+
def number_seasons
|
8
|
+
document.search("a[@href*=episodes#season']").size
|
9
|
+
end
|
10
|
+
|
11
|
+
def season_urls
|
12
|
+
document.search("a[@href*=episodes#season']").map { |link| url.gsub("combined","") + "episodes#season-" + link.innerHTML.strip.imdb_unescape_html } rescue []
|
13
|
+
end
|
14
|
+
|
15
|
+
def seasons
|
16
|
+
s = []
|
17
|
+
season_urls.each_with_index do |url|
|
18
|
+
s << Imdb::Season.new(url)
|
19
|
+
end
|
20
|
+
s
|
21
|
+
end
|
22
|
+
|
23
|
+
end #Serie
|
24
|
+
end # Imdb
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
module Imdb #:nordoc:
|
4
|
+
module StringExtensions
|
5
|
+
|
6
|
+
# Unescape HTML
|
7
|
+
def imdb_unescape_html
|
8
|
+
if String.method_defined?(:encode)
|
9
|
+
CGI::unescapeHTML(self.encode("UTF-8", 'ISO-8859-1'))
|
10
|
+
else
|
11
|
+
require 'iconv'
|
12
|
+
Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Strip tags
|
17
|
+
def imdb_strip_tags
|
18
|
+
gsub(/<\/?[^>]*>/, "")
|
19
|
+
end
|
20
|
+
|
21
|
+
# Strips out whitespace then tests if the string is empty.
|
22
|
+
def blank?
|
23
|
+
strip.empty?
|
24
|
+
end unless method_defined?(:blank?)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
String.send :include, Imdb::StringExtensions
|
data/lib/imdb_parser.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)) unless
|
2
|
+
$:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
|
3
|
+
|
4
|
+
require 'open-uri'
|
5
|
+
require 'rubygems'
|
6
|
+
require 'hpricot'
|
7
|
+
|
8
|
+
require 'imdb_parser/imdb_base'
|
9
|
+
require 'imdb_parser/episode'
|
10
|
+
require 'imdb_parser/season'
|
11
|
+
require 'imdb_parser/movie'
|
12
|
+
require 'imdb_parser/serie'
|
13
|
+
require 'imdb_parser/movie_list'
|
14
|
+
require 'imdb_parser/search'
|
15
|
+
require 'imdb_parser/top_250'
|
16
|
+
require 'imdb_parser/string_extensions'
|
17
|
+
require 'imdb_parser/version'
|
data/script/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# File: script/console
|
3
|
+
irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
|
4
|
+
|
5
|
+
|
6
|
+
libs = " -r irb/completion"
|
7
|
+
# Perhaps use a console_lib to store any extra methods I may want available in the cosole
|
8
|
+
# libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
|
9
|
+
libs << " -r /Users/matthieu/Dev/rails/perso/imdb/lib/imdb.rb"
|
10
|
+
puts "Loading imdb gem"
|
11
|
+
exec "#{irb} #{libs} --simple-prompt"
|
data/script/destroy
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/destroy'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Destroy.new.run(ARGV)
|
data/script/generate
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'rubigen'
|
6
|
+
rescue LoadError
|
7
|
+
require 'rubygems'
|
8
|
+
require 'rubigen'
|
9
|
+
end
|
10
|
+
require 'rubigen/scripts/generate'
|
11
|
+
|
12
|
+
ARGV.shift if ['--help', '-h'].include?(ARGV[0])
|
13
|
+
RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
|
14
|
+
RubiGen::Scripts::Generate.new.run(ARGV)
|
@@ -0,0 +1,14 @@
|
|
1
|
+
HTTP/1.1 302 Found
|
2
|
+
Date: Wed, 14 Sep 2011 19:36:19 GMT
|
3
|
+
Server: Server
|
4
|
+
Cache-Control: private
|
5
|
+
Location: http://akas.imdb.com/title/tt0330508/?fr=c2M9MXxsbT01MDB8ZmI9dXx0dD0xfG14PTIwfGh0bWw9MXxjaD0xfGNvPTF8cG49MHxmdD0xfGt3PTF8cXM9S2FubmV0aGlyZXkgVGhvbmRyaW5hbHxzaXRlPWFrYXxxPUthbm5ldGhpcmV5IFRob25kcmluYWx8bm09MQ__;fc=1;ft=1
|
6
|
+
Cneonction: close
|
7
|
+
Content-Type: text/plain
|
8
|
+
Set-Cookie: uu=BCYv20qm93TIGyXIk82OGarTTY0NVT5VU-kcAuTWuF7jisHqWPSX_CZTPYQ7WQjzVL7ZKWixB0BzsuwCJzcB_MMipd8O954-acpbS8Y3T1M5aXGlGeyywn-2Nuref6-l0K6dSsteRd82ob6x23AoRiwaimQTTxgN_noULvCYOT10nBgmc4vMUx0q2k4P6f0uQLlk_B0ZmA8cgARvhEzG89yJmSkNc3DyBsvYuEhbR55mZDpJkhN8Xu4YdsZ6UpyCfe6G;expires=Thu, 30 Dec 2037 00:00:00 GMT;path=/;domain=.imdb.com
|
9
|
+
Set-Cookie: session-id=349-6028979-0265356;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:19 GMT
|
10
|
+
Set-Cookie: session-id-time=1473708979;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:19 GMT
|
11
|
+
Vary: Accept-Encoding,User-Agent
|
12
|
+
P3P: policyref="http://i.imdb.com/images/p3p.xml",CP="CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC "
|
13
|
+
Content-Length: 0
|
14
|
+
|
@@ -0,0 +1,14 @@
|
|
1
|
+
HTTP/1.1 302 Found
|
2
|
+
Date: Wed, 14 Sep 2011 19:36:12 GMT
|
3
|
+
Server: Server
|
4
|
+
Cache-Control: private
|
5
|
+
Location: http://akas.imdb.com/title/tt0166222/?fr=c2M9MXxsbT01MDB8ZmI9dXx0dD0xfG14PTIwfGh0bWw9MXxjaD0xfGNvPTF8cG49MHxmdD0xfGt3PTF8cXM9SSBraWxsZWQgbXkgbGVzYmlhbiB3aWZlfHNpdGU9YWthfHE9SSBraWxsZWQgbXkgbGVzYmlhbiB3aWZlfG5tPTE_;fc=1;ft=7
|
6
|
+
Cneonction: close
|
7
|
+
Content-Type: text/plain
|
8
|
+
Set-Cookie: uu=BCYp98AY1s7aXCCUXayHXaP1anZAIOWkruf4_bRbNI1LmvCib0NkZTf_Dyv1hcYK6kqssUGYU3sL-6vcaGm5YT9z7dWrODyvzjlW3yVi57Lohq9q2ycNs0wSY_a-1pHHPtCE3tyW-8cKs_WLAUOhogR8GnJWoKPz0rHycTek9j6SoyKO2qI0wIrg7EdjaE6cdRlqF9qOH9KEeqcucoVivBS1_BsRHbyzYKreavQ46kGSSn-aRQ1L6b2bPfK5giWlE-eV;expires=Thu, 30 Dec 2037 00:00:00 GMT;path=/;domain=.imdb.com
|
9
|
+
Set-Cookie: session-id=651-6028972-5384590;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:12 GMT
|
10
|
+
Set-Cookie: session-id-time=1473708972;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:12 GMT
|
11
|
+
Vary: Accept-Encoding,User-Agent
|
12
|
+
P3P: policyref="http://i.imdb.com/images/p3p.xml",CP="CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC "
|
13
|
+
Content-Length: 0
|
14
|
+
|