movieDB 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/movieDB/base.rb CHANGED
@@ -3,14 +3,13 @@ require 'MovieDB/status_checker'
3
3
  require 'MovieDB/movie_error'
4
4
 
5
5
  module MovieDB #:nodoc
6
- # MoviesDB v0.1.x is not a datastore gem. Rather, it is a high-level statistical software that performs
7
- # mathematical computations for analyzing film data from imdb.
8
- # In a nut shell, it is a solution to the common problem of deducing logical hypothesis based off data sets.
9
-
6
+ # MoviesDB is not a datastore gem. Rather, it is a high-level statistical software that performs
7
+ # mathematical computations for analyzing film data from imdb.
8
+ # It is a solution to the common problem of deducing logical hypothesis based off movie data.
10
9
  class Base
11
10
  include StatusChecker
12
11
  include MovieError
13
12
  end
14
-
15
13
  end
16
14
  $:.unshift File.expand_path('..', __FILE__)
15
+
@@ -1,18 +1,15 @@
1
1
  require 'MovieDB'
2
2
 
3
3
  module MovieDB
4
-
5
- ##
6
- # Analysing, inspecting, cleaning, transforming and modeling data.
7
-
4
+ # Analyzing, inspecting, cleaning, transforming and modeling data.
5
+ #
8
6
  class DataAnalysis < MovieDB::Movie
9
7
  module AnalysisOfVariance
10
8
  module LeastSquares
11
9
  module Statistic
12
-
13
10
  def basic_statistic (directory_name)
14
11
  open_spreadsheet(directory_name)
15
-
12
+
16
13
  if check_imdb_count == true
17
14
  puts "*"*41
18
15
  puts "* A minimum of 2 Imdb id's are required *"
@@ -29,76 +26,58 @@ module MovieDB
29
26
  @book = Spreadsheet.open File.join('reports', directory_name)
30
27
  @sheet = @book.worksheet(0)
31
28
 
32
- ##
33
- # Add document formatting
29
+ title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
34
30
 
35
- title_format = Spreadsheet::Format.new :color => :blue,
36
- :weight => :bold,
37
- :size => 13
38
-
39
- @sheet.column(22).width = "worldwide_gross".length
31
+ @sheet.column(22).width = "worldwide_gross".length
40
32
  end
41
33
 
42
34
  def check_imdb_count
43
35
  @sheet.rows.count - 1 == 1
44
36
  end
45
37
 
46
- def perform_computation
47
-
38
+ def perform_computation
39
+ # Perform computation on the data collected.
40
+ # TODO: Need to use coefficienct statistical formula.
41
+ # Calculate median as an example but COD formula must be used.
42
+ # Mean is commonly called as average. Mean or Average is defined as the sum of
43
+ # all the given elements divided by the total number of elements.
44
+ #
45
+ # Range is the difference between the highest and the lowest values in a
46
+ # frequency distribution.
47
+ #
48
+ # Mode is the most frequently occurring value in a frequency distribution.
49
+ #
50
+ # Calculate Standard Deviation.
51
+ # Standard deviation is a statistical measure of spread or variability.
52
+ #
53
+ # The standard deviation is the root mean square (RMS) deviation of the
54
+ # values from their arithmetic mean.
48
55
  total_columns = 22
49
56
  @column = []
50
- @row_count = @sheet.rows.count
51
57
 
52
- ##
53
- # Use this total column count to make it dynamic
54
- #total_columns = @column_count = @sheet.column_count
58
+ @row_count = @sheet.rows.count
55
59
 
56
60
  1.upto(total_columns) do |c|
57
- @column = [] # set instance variable to an empty array
58
-
59
- ##
60
- # loop through to collect all elements
61
- # The returned array includes both strings and integers elements
61
+ @column = []
62
62
 
63
63
  @sheet.each_with_index do |row, i|
64
64
  @column << @sheet[i, 0 + c ]
65
65
  end
66
66
 
67
- @column.shift # delete the string header from the array
68
- @column.compact! # delete nil from the array
69
- row_count = @sheet.rows.count
70
-
71
- ##
72
- # Perform computation on the data collected
73
- # TODO: Need to use coefficienct statistical formula
74
- # Calculate median as an example but COD formula must be used
67
+ @column.shift
68
+ @column.compact!
75
69
 
70
+ row_count = @sheet.rows.count
76
71
 
77
- if @column.all? {|i| (1..99999999999).include? (i)}
78
-
72
+ if @column.all? { |i| (1..99999999999).include? (i) }
79
73
  n = @column.count
80
74
  @column.sort!
81
75
 
82
- ##
83
- # Mean is commonly called as average.Mean or Average is defined as the sum of
84
- # all the given elements divided by the total number of elements.
85
- #
86
- # Range is the difference between the highest and the lowest values in a
87
- # frequency distribution.
88
- #
89
- # Mode is the most frequently occurring value in a frequency distribution.
90
-
91
- @mean = @column.sum/n # Find the mean
92
- @range = @column.max - @column.min # Find the range
76
+ @mean = @column.sum / n
77
+ @range = @column.max - @column.min
93
78
 
94
79
  freq = @column.inject(Hash.new(0)) { |h, v| h[v] += 1; h }
95
- @mode = @column.sort_by { |v| freq[v]}.last # Find the mode
96
-
97
- ##
98
- # Calculate Standard Deviation
99
- # Standard deviation is a statistical measure of spread or variability.
100
- # The standard deviation is the root mean square (RMS) deviation of the
101
- # values from their arithmetic mean.
80
+ @mode = @column.sort_by { |v| freq[v] }.last # Find the mode
102
81
 
103
82
  @column_squared = []
104
83
  @column.each do |col|
@@ -107,15 +86,15 @@ module MovieDB
107
86
 
108
87
  @sum_of_column = @column.sum
109
88
  @sum_of_column_squared = @column_squared.sum
110
- @standard_dev = Math.sqrt((@sum_of_column_squared -((@sum_of_column)*(@sum_of_column)/n))/(n-1))
89
+ @standard_dev = Math.sqrt((@sum_of_column_squared - ((@sum_of_column) * (@sum_of_column) / n)) / (n - 1))
111
90
 
112
91
  if n.odd?
113
- index = (n + 1)/2
114
- @median = @column[index - 1] # Subtract -1 to reduce index value since array start with an index 0.
92
+ index = (n + 1) / 2
93
+ @median = @column[index - 1]
115
94
  else
116
- middle_index = n/2
95
+ middle_index = n / 2
117
96
  right_index = middle_index + 1
118
- @median = (@column[middle_index - 1] + @column[right_index - 1])/2
97
+ @median = (@column[middle_index - 1] + @column[right_index - 1]) / 2
119
98
  end
120
99
 
121
100
  else
@@ -126,9 +105,6 @@ module MovieDB
126
105
  @standard_dev = "N/A"
127
106
  end
128
107
 
129
- ##
130
- # Insert results into spreadsheet cell
131
-
132
108
  @sheet[@row_count + 2, 0 ] = "Mean"
133
109
  @sheet[@row_count + 2, 0 + c ] = @mean
134
110
 
@@ -143,7 +119,6 @@ module MovieDB
143
119
 
144
120
  @sheet[@row_count + 6, 0 ] = "Standard Deviation"
145
121
  @sheet[@row_count + 6, 0 + c ] = @standard_dev
146
-
147
122
  end
148
123
  end
149
124
 
@@ -155,13 +130,16 @@ module MovieDB
155
130
  end
156
131
 
157
132
  def insert_data_to_existing_xls_file
158
-
159
133
  filename = ("#{report_name}.xls")
160
134
  @book.write File.join('reports', filename)
161
135
  return filename
162
136
  end
163
137
  end
164
- module Coefficient_Of_Determination; end
138
+
139
+ module Coefficient_Of_Determination
140
+ # TODO: Add code.
141
+ end
142
+
165
143
  module Discrete_Least_Squares_Meshless_Method; end
166
144
  module Explained_Sum_Of_Squares; end
167
145
  module Fraction_Of_Variance_Unexplained; end
@@ -192,12 +170,10 @@ module MovieDB
192
170
  module Variable_Kernel_Density_Estimation; end
193
171
  end
194
172
 
195
- ##
196
- # primarily EDA is for seeing what the data can
197
- # tell us beyond the formal modeling or hypothesis testing task
198
- # The output will be a visual material
199
-
200
173
  module ExploratoryDataAnalysis
174
+ # primarily EDA is for seeing what the data can
175
+ # tell us beyond the formal modeling or hypothesis testing task.
176
+ # The output will be a visual material.
201
177
  module Data_Reduction; end
202
178
  module Table_Diagonalization; end
203
179
  module Configural_Frequency_Analysis; end
@@ -216,12 +192,12 @@ module MovieDB
216
192
  module RegressionAnalysis
217
193
  module Choice_Modelling; end
218
194
 
219
- module Generalized_Linear_Model
220
- module Binomial_Regression; end
221
- module Generalized_Additive_Model; end
222
- module Linear_Probability_Model; end
223
- module Poisson_Regression; end
224
- module Zero_Inflated_Model; end
195
+ module Generalized_Linear_Model
196
+ module Binomial_Regression; end
197
+ module Generalized_Additive_Model; end
198
+ module Linear_Probability_Model; end
199
+ module Poisson_Regression; end
200
+ module Zero_Inflated_Model; end
225
201
  end
226
202
 
227
203
  module Nonparametric_Regression; end
@@ -251,23 +227,19 @@ module MovieDB
251
227
  end
252
228
  end
253
229
 
254
- ##
255
- #TODO: All Mathetical Calculations go here.
256
-
257
230
  class ExportData
258
231
  def write_spreadsheet (data, data_analysis_name)
259
-
260
232
  begin data_analysis.is_a? String
261
233
  @data_analysis_name = data_analysis_name.split.join.gsub('_', ' ').downcase.to_s
262
234
  case data_analysis_name
263
- when "coefficient of determination"
264
- write_coefficient_of_determination
265
- when "discrete least squares meshless method"
266
- write_discrete_least_squares_meshless_method
267
- when "discrete least squares meshless method"
268
- write_discrete_least_squares_meshless_method
269
- else
270
- end
235
+ when "coefficient of determination"
236
+ write_coefficient_of_determination
237
+ when "discrete least squares meshless method"
238
+ write_discrete_least_squares_meshless_method
239
+ when "discrete least squares meshless method"
240
+ write_discrete_least_squares_meshless_method
241
+ else
242
+ end
271
243
  rescue
272
244
  raise ArgumentError, 'invalid attribute'
273
245
  end
@@ -275,12 +247,10 @@ module MovieDB
275
247
 
276
248
  def write_coefficient_of_determination
277
249
  book = Spreadsheet::Workbook.new
250
+
278
251
  sheet1 = book.create_worksheet name: "Data Analysis: #{@data_analysis_name}"
279
252
  sheet1.row(0).concat %w{title released_date worldwide_gross}
280
253
 
281
- # Loop through the data to collect all values.
282
- # Then values into array
283
-
284
254
  data.each_with_index do |value, index|
285
255
  sheet1[1, index] = "#{value}"
286
256
  end
@@ -1,13 +1,14 @@
1
1
  require "spreadsheet"
2
2
  require "MovieDB"
3
-
3
+
4
4
  # This module will write xls document to file
5
5
  #
6
- # Usage @book = Spreadsheet::Workbook.new
7
-
6
+ # Usage
7
+ #
8
+ # @book = Spreadsheet::Workbook.new
8
9
  module MovieDB
9
10
  class DataExport < MovieDB::Movie
10
- class << self
11
+ class << self
11
12
  #TODO: Check the data analysis(DA) name. Write a define_method and include the DA.
12
13
 
13
14
  def export_movie_data
@@ -17,12 +18,11 @@ module MovieDB
17
18
  end
18
19
 
19
20
  def create_spreadsheet_file
20
- directory_name = ('reports')
21
+ directory_name = 'reports'
21
22
  create_directory(directory_name)
22
23
  Spreadsheet.client_encoding = 'UTF-8'
23
24
  @book = Spreadsheet::Workbook.new
24
- @sheet = @book.create_worksheet name: "Data Analysis: #{$DATA_ANALYSIS_NAME}" # the analysis nameshould be an input
25
-
25
+ @sheet = @book.create_worksheet name: "Data Analysis: #{$DATA_ANALYSIS_NAME}" # the analysis name should be an input
26
26
  end
27
27
 
28
28
  def create_directory(directory_name)
@@ -37,26 +37,25 @@ module MovieDB
37
37
  def create_spreadsheet_header
38
38
  @sheet.row(0).concat $IMDB_ATTRIBUTES_HEADERS
39
39
 
40
- title_format = Spreadsheet::Format.new :color => :blue,
41
- :weight => :bold,
42
- :size => 13
43
-
40
+ title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
44
41
  float_format = Spreadsheet::Format.new :number_format => "0.00"
45
42
 
46
43
  @sheet.row(0).default_format = title_format
44
+
47
45
  @sheet.column(1).default_format = float_format
48
46
  @sheet.column(16).default_format = float_format
49
47
  @sheet.column(22).default_format = float_format
50
48
  end
51
49
 
52
- # Loop through array of and array imbd data. Each row has the
50
+ # Loop through array of and array imbd data. Each row has the
53
51
  # the information about the film/movie
54
52
  # The Data is obtained from MovieDB::Movie
55
53
  # example
56
- # catching fire |
54
+ #
55
+ # Film: catching fire
57
56
  def create_spreadsheet_body
58
- $IMDB_ATTRIBUTES_HEADERS.each do |header|
59
- case header
57
+ $IMDB_ATTRIBUTES_HEADERS.each do |header|
58
+ case header
60
59
  when 'title' then spreadsheet_body_text_data("title")
61
60
  when 'cast_members' then spreadsheet_body_count_data("cast_members")
62
61
  when 'cast_characters' then spreadsheet_body_count_data("cast_characters")
@@ -86,19 +85,19 @@ module MovieDB
86
85
  end
87
86
 
88
87
  def spreadsheet_body_text_data(header_title)
89
- @e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}.flatten
88
+ @e_t = element_title = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }.flatten
90
89
 
91
90
  element_title.each_with_index do |element2, i|
92
91
  element_array = element_title[(i)].split(' ',)
93
- @sheet.row(1 + i).concat element_array
92
+ @sheet.row(1 + i).concat element_array
94
93
  end
95
94
  end
96
95
 
97
96
  def spreadsheet_body_count_data(header_title)
98
- element_cast = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
97
+ element_cast = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }
99
98
 
100
- 0.upto(@e_t.length - 1) do |i|
101
- element_array = []
99
+ 0.upto(@e_t.length - 1) do |i|
100
+ element_array = []
102
101
 
103
102
  element_array << element_cast[i].length
104
103
  @sheet.row(1 + i).concat element_array
@@ -106,7 +105,7 @@ module MovieDB
106
105
  end
107
106
 
108
107
  def spreadsheet_body_numeric_data(header_title)
109
- @e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
108
+ @e_t = element_title = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }
110
109
 
111
110
  element_title.each_with_index do |element2, i|
112
111
  element_array = element_title[(i)]
@@ -5,6 +5,7 @@ module MovieDB
5
5
  PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
6
6
  extend PATH_AOV::Statistic
7
7
  extend PATH_AOV::Coefficient_Of_Determination
8
+
8
9
  include PATH_AOV::Explained_Sum_Of_Squares
9
10
  include PATH_AOV::Fraction_Of_Variance_Unexplained
10
11
  include PATH_AOV::Gauss_Newton_Algorithm
@@ -26,4 +26,3 @@ Talk-Show
26
26
  Thriller
27
27
  War
28
28
  Western
29
-
@@ -1,20 +1,19 @@
1
1
  require 'rubygems'
2
2
  require 'time'
3
-
4
- ##
5
- # Create an actor instance and return the values for
6
- # actor = MovieDB::Actor.instance_eval{create_with_info("Brittany Murphy", "F", "1977-11-10", "2009-12-20")}
7
- # actor = MovieDB::Actor.instance_eval{create_with_info("George Clooney", "M", "1961-05-06", nil)}
8
- # # Name
9
- # # actor_name = actor.map(&:name) #=> ["Brittany Murphy"]
10
- #
11
- # # Alive?
12
- # # actor_name = actor.map(&:alive?) #=> [false, true]
13
- #
14
- # # Age
15
- # # actor_name = actor.map(&:age) #=> [32, 52]
16
- #
17
-
3
+ # Create an actor instance and return the values for the actor variable.
4
+ #
5
+ # actor = MovieDB::Actor.instance_eval{create_with_info("Brittany Murphy", "F", "1977-11-10", "2009-12-20")}
6
+ # actor = MovieDB::Actor.instance_eval{create_with_info("George Clooney", "M", "1961-05-06", nil)}
7
+
8
+ # Example to find the actor name:
9
+ #
10
+ # actor_name = actor.map(&:name) #=> ["Brittany Murphy"]
11
+ #
12
+ # Example to see if an actoyre is alive:
13
+ # actor_name = actor.map(&:alive?) #=> [false, true]
14
+ #
15
+ # Example to find an actor's age:
16
+ # actor_name = actor.map(&:age) #=> [32, 52]
18
17
  module MovieDB
19
18
  class Person
20
19
  attr_accessor :name, :gender, :birth_date, :death_date, :birthplace
@@ -49,27 +48,28 @@ module MovieDB
49
48
  person.gender = gender
50
49
  person.birth_date = birth_date
51
50
  person.death_date = death_date
51
+
52
52
  return @person_DS << person
53
53
  end
54
54
 
55
55
  def filter_person(attr)
56
56
  attr = attr.to_sym
57
- raise ArgumentError, "#{attr} can only be name or age" if !attr == :age && :name
57
+ raise ArgumentError "#{attr} can only be name or age" if !attr == :age && :name
58
+
58
59
  return @person_DS.select{|s| s.alive?}.map(&attr)
59
60
  end
60
61
 
61
- ##
62
- # Returns a random parameter integer between min to max,
63
- # rather than a float between min to max.(Ruby 2.0.0)
64
- #
65
-
62
+ # Returns a random parameter integer between min to max,
63
+ # rather than a float between min to max.(Ruby 2.0.0)
66
64
  def sample_attr(attr)
67
65
  randgen = Object.new
68
66
  attr_array = self.instance_eval{filter_person(attr)}
69
67
  attr_array.sample(random: randgen)
70
68
  end
71
69
  end
70
+
72
71
  private_class_method :create_with_info, :filter_person
72
+
73
73
  end
74
74
 
75
75
  class Actor < Person
@@ -85,7 +85,8 @@ module MovieDB
85
85
  end
86
86
 
87
87
  def actor_actress_gender(person)
88
- case when person.gender == 'F'
88
+ case
89
+ when person.gender == 'F'
89
90
  return "actress"
90
91
  when person.gender == "M"
91
92
  return "actor"
@@ -95,23 +96,22 @@ module MovieDB
95
96
  end
96
97
 
97
98
  class << self
98
-
99
99
  def filter_actor_alive(attr)
100
100
  attr = attr.to_sym
101
- raise ArgumentError, "#{attr} can only be name or age" if !attr == :age && :name
101
+ raise ArgumentError "#{attr} can only be name or age" if !attr == :age && :name
102
+
102
103
  return @person_DS.select{|s| s.alive?}.map(&"#{attr.to_sym}")
103
104
  end
104
105
 
105
106
  def filter_actor_deceased(actor)
106
- return @person_DS.select{|s| !s.alive?}.map{|m| "#{m.age}"} if attr == "age"
107
- return @person_DS.select{|s| !s.alive?}.map{|m| "#{m.name}"} if attr == "name"
107
+ return @person_DS.select{ |s| !s.alive?}.map{ |m| "#{m.age}" } if attr == "age"
108
+ return @person_DS.select{ |s| !s.alive?}.map{ |m| "#{m.name}" } if attr == "name"
108
109
  end
109
110
 
110
111
  end
111
112
  end
112
113
 
113
114
  class Writer < Person
114
-
115
115
  attr_accessor :published_work
116
116
  alias :published? :published_work
117
117
 
@@ -123,7 +123,6 @@ module MovieDB
123
123
  end
124
124
 
125
125
  class Director < Person
126
-
127
126
  attr_accessor :filmography
128
127
 
129
128
  def initialize(filmography = [])