imdb_parser 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. data/.gitignore +8 -0
  2. data/Gemfile +3 -0
  3. data/History.txt +74 -0
  4. data/Manifest.txt +29 -0
  5. data/README.rdoc +108 -0
  6. data/Rakefile +35 -0
  7. data/bin/imdb +10 -0
  8. data/config/website.yml +2 -0
  9. data/imdb.gemspec +29 -0
  10. data/lib/imdb_parser/cli.rb +109 -0
  11. data/lib/imdb_parser/episode.rb +26 -0
  12. data/lib/imdb_parser/imdb_base.rb +146 -0
  13. data/lib/imdb_parser/movie.rb +13 -0
  14. data/lib/imdb_parser/movie_list.rb +41 -0
  15. data/lib/imdb_parser/search.rb +46 -0
  16. data/lib/imdb_parser/season.rb +45 -0
  17. data/lib/imdb_parser/serie.rb +24 -0
  18. data/lib/imdb_parser/string_extensions.rb +28 -0
  19. data/lib/imdb_parser/top_250.rb +10 -0
  20. data/lib/imdb_parser/version.rb +3 -0
  21. data/lib/imdb_parser.rb +17 -0
  22. data/script/console +11 -0
  23. data/script/destroy +14 -0
  24. data/script/generate +14 -0
  25. data/spec/fixtures/search_kannethirey_thondrinal +14 -0
  26. data/spec/fixtures/search_killed_wife +14 -0
  27. data/spec/fixtures/search_star_trek +834 -0
  28. data/spec/fixtures/top_250 +1433 -0
  29. data/spec/fixtures/tt0036855 +1255 -0
  30. data/spec/fixtures/tt0083987 +1261 -0
  31. data/spec/fixtures/tt0095016 +1286 -0
  32. data/spec/fixtures/tt0110912 +1262 -0
  33. data/spec/fixtures/tt0111161 +1272 -0
  34. data/spec/fixtures/tt0117731 +1246 -0
  35. data/spec/fixtures/tt0166222 +1806 -0
  36. data/spec/fixtures/tt0242653 +1254 -0
  37. data/spec/fixtures/tt0330508 +1581 -0
  38. data/spec/fixtures/tt0468569 +1305 -0
  39. data/spec/fixtures/tt1401252 +1109 -0
  40. data/spec/imdb/cli_spec.rb +49 -0
  41. data/spec/imdb/movie_spec.rb +204 -0
  42. data/spec/imdb/search_spec.rb +78 -0
  43. data/spec/imdb/top_250_spec.rb +21 -0
  44. data/spec/spec.opts +1 -0
  45. data/spec/spec_helper.rb +57 -0
  46. data/tasks/fixtures.rake +15 -0
  47. data/tasks/rspec.rake +21 -0
  48. metadata +183 -0
@@ -0,0 +1,45 @@
1
+ module Imdb
2
+
3
+ class Season
4
+ attr_accessor :id, :url, :season_number, :episodes
5
+
6
+
7
+ def initialize(url)
8
+ @url = url
9
+ @season_number = url[-1,1]
10
+ @episodes = []
11
+ end
12
+
13
+ # Returns numbers of episode
14
+ def episode_numbers
15
+ document.search("div div[@class*=season-filter-all filter-season-#{season_number}'] div[@class*=filter-all]").size rescue nil
16
+ end
17
+
18
+ def episode(number = 1)
19
+ Imdb::Episode.new(number, document.search("div div[@class*=season-filter-all filter-season-#{season_number}'] div[@class*=filter-all]")[number - 1], self)
20
+ end
21
+
22
+ def episodes
23
+ if @episodes.empty?
24
+ document.search("div div[@class*=season-filter-all filter-season-#{season_number}'] div[@class*=filter-all]").each_with_index do |node_episode, i|
25
+ @episodes << Imdb::Episode.new(i + 1, node_episode, self)
26
+ end
27
+ end
28
+ @episodes
29
+ end
30
+
31
+ private
32
+
33
+ # Returns a new Hpricot document for parsing.
34
+ def document
35
+ @document ||= Hpricot(Imdb::Season.find_by_season(@url))
36
+ end
37
+
38
+ # Use HTTParty to fetch the raw HTML for this season
39
+ def self.find_by_season(url)
40
+ open(url)
41
+ end
42
+
43
+ end # Season
44
+
45
+ end # Imdb
@@ -0,0 +1,24 @@
1
+ module Imdb
2
+
3
+ class Serie < ImdbBase
4
+
5
+ # s = Imdb::Serie.new("0773262")
6
+ # e = s.seasons.first.episodes.first
7
+ def number_seasons
8
+ document.search("a[@href*=episodes#season']").size
9
+ end
10
+
11
+ def season_urls
12
+ document.search("a[@href*=episodes#season']").map { |link| url.gsub("combined","") + "episodes#season-" + link.innerHTML.strip.imdb_unescape_html } rescue []
13
+ end
14
+
15
+ def seasons
16
+ s = []
17
+ season_urls.each_with_index do |url|
18
+ s << Imdb::Season.new(url)
19
+ end
20
+ s
21
+ end
22
+
23
+ end #Serie
24
+ end # Imdb
@@ -0,0 +1,28 @@
1
+ require 'cgi'
2
+
3
+ module Imdb #:nordoc:
4
+ module StringExtensions
5
+
6
+ # Unescape HTML
7
+ def imdb_unescape_html
8
+ if String.method_defined?(:encode)
9
+ CGI::unescapeHTML(self.encode("UTF-8", 'ISO-8859-1'))
10
+ else
11
+ require 'iconv'
12
+ Iconv.conv("UTF-8", 'ISO-8859-1', CGI::unescapeHTML(self))
13
+ end
14
+ end
15
+
16
+ # Strip tags
17
+ def imdb_strip_tags
18
+ gsub(/<\/?[^>]*>/, "")
19
+ end
20
+
21
+ # Strips out whitespace then tests if the string is empty.
22
+ def blank?
23
+ strip.empty?
24
+ end unless method_defined?(:blank?)
25
+ end
26
+ end
27
+
28
+ String.send :include, Imdb::StringExtensions
@@ -0,0 +1,10 @@
1
+ module Imdb
2
+
3
+ class Top250 < MovieList
4
+ private
5
+ def document
6
+ @document ||= Hpricot(open("http://akas.imdb.com/chart/top"))
7
+ end
8
+ end # Top250
9
+
10
+ end # Imdb
@@ -0,0 +1,3 @@
1
+ module Imdb
2
+ VERSION = '1.1.0'
3
+ end
@@ -0,0 +1,17 @@
1
+ $:.unshift(File.dirname(__FILE__)) unless
2
+ $:.include?(File.dirname(__FILE__)) || $:.include?(File.expand_path(File.dirname(__FILE__)))
3
+
4
+ require 'open-uri'
5
+ require 'rubygems'
6
+ require 'hpricot'
7
+
8
+ require 'imdb_parser/imdb_base'
9
+ require 'imdb_parser/episode'
10
+ require 'imdb_parser/season'
11
+ require 'imdb_parser/movie'
12
+ require 'imdb_parser/serie'
13
+ require 'imdb_parser/movie_list'
14
+ require 'imdb_parser/search'
15
+ require 'imdb_parser/top_250'
16
+ require 'imdb_parser/string_extensions'
17
+ require 'imdb_parser/version'
data/script/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # File: script/console
3
+ irb = RUBY_PLATFORM =~ /(:?mswin|mingw)/ ? 'irb.bat' : 'irb'
4
+
5
+
6
+ libs = " -r irb/completion"
7
+ # Perhaps use a console_lib to store any extra methods I may want available in the cosole
8
+ # libs << " -r #{File.dirname(__FILE__) + '/../lib/console_lib/console_logger.rb'}"
9
+ libs << " -r /Users/matthieu/Dev/rails/perso/imdb/lib/imdb.rb"
10
+ puts "Loading imdb gem"
11
+ exec "#{irb} #{libs} --simple-prompt"
data/script/destroy ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
data/script/generate ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,14 @@
1
+ HTTP/1.1 302 Found
2
+ Date: Wed, 14 Sep 2011 19:36:19 GMT
3
+ Server: Server
4
+ Cache-Control: private
5
+ Location: http://akas.imdb.com/title/tt0330508/?fr=c2M9MXxsbT01MDB8ZmI9dXx0dD0xfG14PTIwfGh0bWw9MXxjaD0xfGNvPTF8cG49MHxmdD0xfGt3PTF8cXM9S2FubmV0aGlyZXkgVGhvbmRyaW5hbHxzaXRlPWFrYXxxPUthbm5ldGhpcmV5IFRob25kcmluYWx8bm09MQ__;fc=1;ft=1
6
+ Cneonction: close
7
+ Content-Type: text/plain
8
+ Set-Cookie: uu=BCYv20qm93TIGyXIk82OGarTTY0NVT5VU-kcAuTWuF7jisHqWPSX_CZTPYQ7WQjzVL7ZKWixB0BzsuwCJzcB_MMipd8O954-acpbS8Y3T1M5aXGlGeyywn-2Nuref6-l0K6dSsteRd82ob6x23AoRiwaimQTTxgN_noULvCYOT10nBgmc4vMUx0q2k4P6f0uQLlk_B0ZmA8cgARvhEzG89yJmSkNc3DyBsvYuEhbR55mZDpJkhN8Xu4YdsZ6UpyCfe6G;expires=Thu, 30 Dec 2037 00:00:00 GMT;path=/;domain=.imdb.com
9
+ Set-Cookie: session-id=349-6028979-0265356;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:19 GMT
10
+ Set-Cookie: session-id-time=1473708979;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:19 GMT
11
+ Vary: Accept-Encoding,User-Agent
12
+ P3P: policyref="http://i.imdb.com/images/p3p.xml",CP="CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC "
13
+ Content-Length: 0
14
+
@@ -0,0 +1,14 @@
1
+ HTTP/1.1 302 Found
2
+ Date: Wed, 14 Sep 2011 19:36:12 GMT
3
+ Server: Server
4
+ Cache-Control: private
5
+ Location: http://akas.imdb.com/title/tt0166222/?fr=c2M9MXxsbT01MDB8ZmI9dXx0dD0xfG14PTIwfGh0bWw9MXxjaD0xfGNvPTF8cG49MHxmdD0xfGt3PTF8cXM9SSBraWxsZWQgbXkgbGVzYmlhbiB3aWZlfHNpdGU9YWthfHE9SSBraWxsZWQgbXkgbGVzYmlhbiB3aWZlfG5tPTE_;fc=1;ft=7
6
+ Cneonction: close
7
+ Content-Type: text/plain
8
+ Set-Cookie: uu=BCYp98AY1s7aXCCUXayHXaP1anZAIOWkruf4_bRbNI1LmvCib0NkZTf_Dyv1hcYK6kqssUGYU3sL-6vcaGm5YT9z7dWrODyvzjlW3yVi57Lohq9q2ycNs0wSY_a-1pHHPtCE3tyW-8cKs_WLAUOhogR8GnJWoKPz0rHycTek9j6SoyKO2qI0wIrg7EdjaE6cdRlqF9qOH9KEeqcucoVivBS1_BsRHbyzYKreavQ46kGSSn-aRQ1L6b2bPfK5giWlE-eV;expires=Thu, 30 Dec 2037 00:00:00 GMT;path=/;domain=.imdb.com
9
+ Set-Cookie: session-id=651-6028972-5384590;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:12 GMT
10
+ Set-Cookie: session-id-time=1473708972;path=/;domain=.imdb.com;expires=Mon, 12 Sep 2016 12:36:12 GMT
11
+ Vary: Accept-Encoding,User-Agent
12
+ P3P: policyref="http://i.imdb.com/images/p3p.xml",CP="CAO DSP LAW CUR ADM IVAo IVDo CONo OTPo OUR DELi PUBi OTRi BUS PHY ONL UNI PUR FIN COM NAV INT DEM CNT STA HEA PRE LOC GOV OTC "
13
+ Content-Length: 0
14
+