movieDB 0.3.4 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -6,3 +6,6 @@ Rake::TestTask.new do |t|
6
6
  t.test_files = FileList['test/unit/test*.rb']
7
7
  t.verbose = true
8
8
  end
9
+
10
+ require 'coveralls/rake/task'
11
+ Coveralls::RakeTask.new
@@ -1,23 +1,14 @@
1
- require "rubygems"
2
- require "open-uri" # is an easy-to-use wrapper for net/http, net/https and net/ftp.
3
- require "nokogiri" # is an HTML, XML, SAX, and Reader parser.
4
- require "themoviedb" # Provides a simple, easy to use interface for the Movie Database API.
5
- require "imdb" # Easily use Ruby or the command line to find information on IMDB.com.
6
- require "spreadsheet" # A library designed to read and write Spreadsheet Documents.
7
- require "MovieDB/base"
8
- require "MovieDB/data_analysis"
9
- require "MovieDB/secret"
10
- require "MovieDB/data_export"
11
- require "redis"
12
- require "json"
13
- require 'highline/import'
1
+ require 'MovieDB/base'
2
+ require 'MovieDB/data_store'
3
+ require 'MovieDB/data_analysis/statistics'
4
+ require 'MovieDB/relation/query_methods'
5
+ require 'celluloid/current'
14
6
 
15
- unless defined? MovieDB::Movie
16
- module MovieDB
7
+ module MovieDB
17
8
  # Create a new movie record. The values are stored in the key-value data store.
18
9
  #
19
10
  # Default values are supplement during the instantiation of the class.
20
- # Those values are overriden when you provide one.
11
+ # Those values are overridden when you provide one.
21
12
  #
22
13
  # You can use it like this:
23
14
  #
@@ -26,140 +17,30 @@ unless defined? MovieDB::Movie
26
17
  #
27
18
  # You can raise a MovieError like this:
28
19
  #
29
- # raise MovieError unless Movie.title_present?
30
- class Movie < MovieDB::Base
31
- include StatusChecker
20
+ class Movie < MovieDB::Base
21
+ include Celluloid
22
+ include MovieDB::Relation::QueryMethods
23
+ include MovieDB::DataAnalysis::Statistics
32
24
 
33
- extend MovieDB::DataExport
34
- extend MovieDB::Secret::Lock
25
+ def ids_to_array(ids)
26
+ arr ||= []
35
27
 
36
- const_set("MovieError", Class.new(StandardError))
37
-
38
- attr_accessor :title,
39
- :cast_members,
40
- :cast_characters,
41
- :cast_member_ids,
42
- :cast_members_characters,
43
- :trailer_url,
44
- :director,
45
- :writers,
46
- :filming_locations,
47
- :company,
48
- :genres,
49
- :languages,
50
- :countries,
51
- :length,
52
- :plot,
53
- :poster,
54
- :rating,
55
- :votes,
56
- :mpaa_rating,
57
- :tagline,
58
- :year,
59
- :release_date,
60
- :revenue
61
-
62
- DEFAULT_TITLE = "Method Missing 2: Rails Roars!"
63
- DEFAULT_CAST_MEMBERS = ["David Black", "Paola Perotta", "Obie Fernandez", "David Chelimsky"]
64
- DEFAULT_CAST_CHARACTERS = ["Developer", "Police Officer", "Hunter", "Hostage"]
65
- DEFAULT_CAST_MEMBERS_CHARACTERS = ["David Black => Developer", "Paola Perotta => Police Officer",
66
- "Obie Fernandez =>Hunter", "David Chelimsky =>Hostage"]
67
- DEFAULT_CAST_MEMBER_IDS = ["nm3901234", "nm4901244", "nm5901235", "nm3601266"]
68
- DEFAULT_TRAILER_URL = "http://imdb.com/video/screenplay/vi581042457/"
69
- DEFAULT_DIRECTOR = "Yukihiro 'Matz' Matsumoto"
70
- DEFAULT_WRITERS = 'David Heinemeier Hansson'
71
- DEFAULT_FILMING_LOCATIONS = ["Manhattan, New York, USA"]
72
- DEFAULT_COMPANY = "Open Source Community Film Corporation"
73
- DEFAULT_GENRES = ["Bromantic", "Syfy"]
74
- DEFAULT_LANGUAGES = ["English", "German", "Italian"]
75
- DEFAULT_COUNTRIES = ["USA", "Germany", "Italy"]
76
- DEFAULT_LENGTH = 146
77
- DEFAULT_PLOT = ["David Black, a ruby developer, tries to write his flagship ruby book 'The Well-Grounded Rubyist vol. 186' only to find out that Ruby 9.0.2 and Rails 16.0.3 release dates have been postponed"]
78
- DEFAULT_POSTER = "http://ia.media-imdb.com/images/M/MV5BMTY@@.jpg"
79
- DEFAULT_RATING = 9.9
80
- DEFAULT_VOTES = 110636
81
- DEFAULT_MPAA_RATING = "Rated R for dynamic OOD language usage and private methods access (certificate 33087)"
82
- DEFAULT_TAGLINE = 'Only One MVC Will Rule Them All.'
83
- DEFAULT_YEAR = 2013
84
- DEFAULT_RELEASE_DATE = "11 October 2013 (USA)"
85
- DEFAULT_REVENUE = 456790
86
-
87
- def initialize(attributes = {})
88
- MovieDB::DataExport::IMDB_ATTRIBUTES_HEADERS.each do |attr|
89
- self.send("#{attr}=", (attributes.has_key?(attr.to_sym) ? attributes[attr.to_sym] : self.class.const_get("DEFAULT_#{attr.upcase}")))
90
- end
91
- end
92
-
93
- # Future release of this software will scrap IMDb data from boxofficemojoAPI.com
94
- # https://github.com/skozilla/BoxOfficeMojo/tree/master/boxofficemojoAPI
95
- #
96
- # You can fetch IMDb movie data like this:
97
- # ids = ["2024544", "1800241" ]
98
- #
99
- # MovieDB::Movie.find_imdb_id(ids)
100
- #
101
- # The fetch data is stored in redis for 1800 seconds
102
- # And then written to a xls file.
103
- def self.find_imdb_id(*args)
104
- raise ArgumentError if args.empty?
105
-
106
- get_imdb_movie_data(args)
107
- get_tmdb_movie_data(args)
108
- cache_movie_data_to_redis(args)
109
- write_imdb_data_to_xls
110
- end
111
-
112
- def self.get_imdb_movie_data(*args) #:nodoc:
113
- @imdb_id = []
114
- @imdb_movie_data = []
115
-
116
- args.flatten.each do |value|
117
- @imdb_id << value
118
-
119
- Movie.new # Instantiate a movie object.
120
- @imdb_movie_data << Imdb::Movie.new(value)
121
- end
122
- @imdb_movie_data
28
+ if ids.is_a? String
29
+ arr << ids
123
30
  end
124
31
 
125
- def self.get_tmdb_movie_data(*args) #:nodoc:
126
- @tmdb_movie_data = []
127
- Tmdb::Api.key(Movie.key)
128
-
129
- args.flatten.each do |value|
130
- @tmdb_movie_data << Tmdb::Movie.detail("tt#{value}")
131
- end
132
- @tmdb_movie_data
32
+ if ids.is_a? Numeric
33
+ arr << ids.to_s
133
34
  end
134
35
 
135
- def self.cache_movie_data_to_redis(*args) #:nodoc:
136
- @db_redis ||= Redis.new
137
- @db_redis.del "revenue"
138
-
139
- args.flatten.each_with_index do |value, idx|
140
- imdb_movie_data = @imdb_movie_data[idx]
141
- tmdb_movie_data = @tmdb_movie_data[idx]
142
-
143
- MovieDB::DataExport::IMDB_ATTRIBUTES_HEADERS.each do |attr_key|
144
- begin imdb_movie_data.send(attr_key)
145
- attr_value = imdb_movie_data.send(attr_key)
146
- rescue
147
- attr_value = tmdb_movie_data['revenue']
148
- end
149
-
150
- @db_redis.hset "movie:#{value}", "#{attr_key}", "#{attr_value}" # Adding a hash data type.
151
- @db_redis.lpush "#{attr_key}", "#{attr_value}" if attr_value.is_a? Numeric # Adding a list data type.
152
-
153
- @db_redis.expire "#{attr_key}", 1800
154
- @db_redis.expire "movie:#{value}", 1800
155
- end
36
+ if ids.is_a? Array
37
+ ids.each do |n|
38
+ arr << n
156
39
  end
157
- @db_redis
158
40
  end
159
41
 
160
- def self.write_imdb_data_to_xls #:nodoc:
161
- Movie.export_movie_data(@db_redis, @imdb_id)
162
- end
42
+ return arr.flatten
163
43
  end
164
44
  end
165
45
  end
46
+
@@ -1,14 +1,7 @@
1
- require 'rubygems'
2
- require 'MovieDB/status_checker'
3
- require 'MovieDB/movie_error'
4
-
5
- module MovieDB #:nodoc
6
- # MoviesDB is not a datastore gem. Rather, it is a high-level statistical software that performs
1
+ module MovieDB
2
+ # MoviesDB is not a data store gem. Rather, it is a high-level statistical software that performs
7
3
  # mathematical computations for analyzing film data from imdb.
8
4
  # It is a solution to the common problem of deducing logical hypothesis based off movie data.
9
- class Base
10
- include StatusChecker
11
- include MovieError
12
- end
5
+ class Base; end
13
6
  end
14
7
  $:.unshift File.expand_path('..', __FILE__)
@@ -0,0 +1,85 @@
1
+ require 'daru'
2
+
3
+ module MovieDB
4
+ module DataAnalysis
5
+ module Statistics
6
+ def numeric_vals
7
+ %w(votes budget rating revenue length year mpaa_rating popularity vote_count vote_average runtime)
8
+ end
9
+
10
+ module_function :numeric_vals
11
+
12
+ stats = [:mean, :std, :sum, :count, :max, :min, :min, :product, :standardize, :describe, :covariance, :correlation, :worksheet]
13
+
14
+ stats.each do |method_name|
15
+ define_method method_name do |**args|
16
+ dataframes_stats(method_name, args)
17
+ end
18
+ end
19
+
20
+ private
21
+
22
+ def dataframes_stats(method, filters = {})
23
+ raise ArgumentError, 'Please provide 2 or more IMDd ids.' if $movie_data.length <= 1
24
+
25
+ @data_key = {}
26
+ @index = []
27
+
28
+ if filters.empty?
29
+ $movie_data.each_with_index do |movie, _|
30
+ value_count = []
31
+
32
+ movie.each_pair do |k, v|
33
+ @data_key[(movie['title'].sub(" ", "_").downcase)] = value_count << (MovieDB::DataAnalysis::Statistics.numeric_vals.any? { |word| word == k } ? v.to_i : v.split(' ').count)
34
+ @index << k.to_sym
35
+ end
36
+ end
37
+ else
38
+ case filters.keys[0]
39
+ when :only
40
+ $movie_data.each_with_index do |movie, _|
41
+ value_count ||= []
42
+
43
+ filters.values.flatten.each do |filter|
44
+
45
+ mr = movie.reject { |k, _| k != filter.to_s }
46
+
47
+ mr.each_pair do |k, v|
48
+ @data_key[(movie['title'].sub(" ", "_").downcase)] = value_count << (MovieDB::DataAnalysis::Statistics.numeric_vals.any? { |word| word == k } ? v.to_i : v.join(' ').split(' ').count)
49
+ @index << k.to_sym
50
+ end
51
+ end
52
+ end
53
+ when :except
54
+ $movie_data.each_with_index do |movie, _|
55
+
56
+ filters.values.flatten.each do |filter|
57
+ mr = movie.reject { |k, _| k == filter.to_s }
58
+ value_count = []
59
+
60
+ mr.each_pair do |k, v|
61
+ @data_key[(movie['title'].sub(" ", "_").downcase)] = value_count << (MovieDB::DataAnalysis::Statistics.numeric_vals.any? { |word| word == k } ? v.to_i : v.join(' ').split(' ').count)
62
+ @index << k.to_sym
63
+ end
64
+ end
65
+ end
66
+ else
67
+ raise ArgumentError, "#{filters.keys[0]} is not a valid filter."
68
+ end
69
+ end
70
+
71
+ index = @index.uniq
72
+
73
+ movie_numeric_vector = Hash[@data_key.map { |k, v| [k.to_s.gsub('-', '_').to_sym, v] }]
74
+ compute_stats(method, movie_numeric_vector, index )
75
+ end
76
+
77
+ def compute_stats(method, movie, index)
78
+ df = Daru::DataFrame.new(movie,
79
+ name: :movie, index: index)
80
+ method == :worksheet ? df : df.send(method)
81
+ end
82
+ end
83
+ end
84
+ end
85
+
@@ -0,0 +1,83 @@
1
+ require 'redis'
2
+
3
+ # Movie data fetched from IMDb is stored as a hash data type in redis.
4
+ module MovieDB
5
+ module DataStore
6
+ # Create a redis instance
7
+ # with timeouts.
8
+ def self.initialize_redis
9
+ @redis_db ||= Redis.new(connect_timeout: 20, timeout: 20)
10
+ end
11
+
12
+ def imdb_methods
13
+ [:title, :also_known_as, :cast_members, :cast_characters, :cast_members_characters,
14
+ :director, :writers, :trailer_url, :genres, :languages, :countries, :length, :company, :plot, :plot_synopsis,
15
+ :plot_summary, :poster, :rating, :votes, :tagline, :mpaa_rating, :year, :release_date, :filming_locations]
16
+ end
17
+
18
+ module_function :imdb_methods
19
+
20
+ # The options returns with 3 keys
21
+ # options[:imdb_tmdb], contains the movie data
22
+ # options[:id], contains the IMDb id.
23
+ # options[:expire] contains the expiration time for redis.
24
+ #
25
+ # IMDb return a status code of 34 if the resource can not be found.
26
+
27
+ def self.write_data(**options)
28
+ if options[:imdb_tmdb].is_a? Hash
29
+
30
+ options.each_pair do |k, v|
31
+ if v.is_a? Hash
32
+ if v["status_code"] == "34"
33
+ puts "#{options[:id]} is an invalid IMDb id."
34
+ else
35
+ v.each_pair do |j, w|
36
+ @redis_db.hsetnx "#{options[:id]}", "#{j}", "#{w}"
37
+ end
38
+ end
39
+ end
40
+ end
41
+ else
42
+ imdb_methods.each do |method|
43
+ @redis_db.hsetnx "#{options[:imdb_tmdb].id}", method.to_s, "#{options[:imdb_tmdb].send(method)}"
44
+ end
45
+ end
46
+
47
+ @redis_db.expire "#{options[:id]}", "#{options[:expire]}"
48
+ end
49
+
50
+ # You can fetch one data at at a time.
51
+ # Do not send an array of arguments.
52
+ #
53
+ # Example the following is accepted.
54
+ #
55
+ # MovieDB::Movie.get_data('0369610')
56
+ #
57
+ # Not accepted:
58
+ # MovieDB::Movie.get_data(['0369610', 3079380])
59
+ def self.get_data(method, id = nil)
60
+ initialize_redis
61
+
62
+ case method
63
+ when :all
64
+ return @redis_db.hgetall "#{id}"
65
+ when :hkeys
66
+ return @redis_db.hkeys "#{id}"
67
+ when :hvals
68
+ return @redis_db.hvals "#{id}"
69
+ when :scan
70
+ return @redis_db.scan 0
71
+ when :flushall
72
+ return @redis_db.flushall
73
+ when :get
74
+ return @redis_db.hgetall("#{id}")
75
+ when :ttl
76
+ return @redis_db.ttl("#{id}")
77
+ else
78
+ raise ArgumentError, "The method #{method} is invalid."
79
+ end
80
+ end
81
+ end
82
+ end
83
+
@@ -0,0 +1,139 @@
1
+ require "MovieDB/secret"
2
+ require "MovieDB/data_store"
3
+ require "MovieDB/support/reporting"
4
+ require "themoviedb"
5
+ require "imdb"
6
+
7
+ module MovieDB
8
+ module Relation
9
+ module QueryMethods
10
+ extend MovieDB::Secret
11
+
12
+ # Fetch data from IMDb.
13
+ # Default expiration time for stored object in redis is 1800 seconds.
14
+ # You can set this value to what ever you like.
15
+ #
16
+ # Example:
17
+ #
18
+ # m = MovieDB::Movie.new
19
+ #
20
+ # m.fetch("0369324", "0369662", expire: 84600)
21
+ #
22
+ def fetch(*ids, expire: 1800)
23
+ store_data(ids_to_array(ids), expire)
24
+
25
+ # Collect all fetched data and assign to global variable
26
+ arr = []
27
+
28
+ ids.each do |id|
29
+ arr << (hgetall(id))
30
+ end
31
+
32
+ $movie_data = arr
33
+ end
34
+
35
+ def store_data(ids, expire)
36
+ check_rate_limit(ids)
37
+
38
+ ids.each do |id|
39
+ movie_exists?(id) ? true : imdb_tmdb_lookup(id, expire)
40
+ end
41
+ end
42
+
43
+ # Modifying and manipulating redis objects.
44
+ # Example:
45
+ #
46
+ # m = MovieDB::Movie.new
47
+ #
48
+ # m.fetch("0369610", "3079380", "0478970")
49
+ #
50
+ # m.hgetall("0369610")
51
+ [:all, :hkeys, :hvals, :flushall, :ttl].each do |method_name|
52
+ define_method method_name do |arg|
53
+ MovieDB::DataStore.get_data(method_name, arg)
54
+ end
55
+ end
56
+
57
+ alias hgetall all
58
+
59
+ [:scan, :flushall].each do |method_name|
60
+ define_method method_name do
61
+ mn = MovieDB::DataStore.get_data(method_name)
62
+ mn.flatten.delete_if { |n| n == "0" } if method_name == :scan
63
+ end
64
+ end
65
+
66
+ alias all_ids scan
67
+ alias delete_all flushall
68
+
69
+ def mset(record, id, expire)
70
+ MovieDB::DataStore.write_data(imdb_tmdb: record, id: id, expire: expire)
71
+ end
72
+
73
+ def movie_exists?(id)
74
+ !hgetall(id).empty?
75
+ end
76
+
77
+ # Fetch the movie from both IMDb and TMDb repositories.
78
+ #
79
+ # Future release of this software will scrap IMDb data from boxofficemojoAPI.com
80
+ # using Mechanize gem.
81
+ #
82
+ # Reference https://github.com/skozilla/BoxOfficeMojo/tree/master/boxofficemojoAPI
83
+ # for the api.
84
+ def imdb_tmdb_lookup(id, expire) # :nodoc:
85
+ query_imdb(id, expire)
86
+ query_tmdb(id, expire)
87
+ end
88
+
89
+ def query_imdb(id, expire) # :nodoc:
90
+ # Query IMDb
91
+ imdb = Imdb::Movie.new(id)
92
+
93
+ raise NameError, "#{id} is an invalid IMDb id." if imdb.title.nil?
94
+
95
+ mset(imdb, id, expire)
96
+ end
97
+
98
+ def query_tmdb(id, expire) # :nodoc:
99
+ Tmdb::Api.key(MovieDB::Secret.key)
100
+
101
+ tmdb = Tmdb::Movie.detail("tt#{id}")
102
+
103
+ raise NameError, "#{id} is an invalid TMDb id." if tmdb.nil?
104
+
105
+ mset(tmdb, id, expire)
106
+ end
107
+
108
+ def fetch_data(method, ids = nil)
109
+ if ids.nil?
110
+ MovieDB::DataStore.get_data(method)
111
+ else
112
+ ids.each do |id|
113
+ MovieDB::DataStore.get_data(method, id)
114
+ end
115
+ end
116
+ end
117
+
118
+ private
119
+
120
+ def check_argument(method, ids) # :nodoc:
121
+ if ids.flatten!.empty?
122
+ raise ArgumentError, "The method #{method}() must contain arguments."
123
+ end
124
+ end
125
+
126
+ # IMDb current limits are 40 requests every 10
127
+ # seconds and are limited by IP address, not API key.
128
+ def check_rate_limit(ids)
129
+ if ids.length >= 40
130
+ MovieDB::Support::Reporting.warn(<<-MSG.strip!)
131
+ Reduce the amount of IMDb ids. \nYou have exceeded the rate limit.
132
+ MSG
133
+ else
134
+ MovieDB::Support::Reporting.silenced
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end