movieDB 0.2.2 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/movieDB/base.rb CHANGED
@@ -3,14 +3,13 @@ require 'MovieDB/status_checker'
3
3
  require 'MovieDB/movie_error'
4
4
 
5
5
  module MovieDB #:nodoc
6
- # MoviesDB v0.1.x is not a datastore gem. Rather, it is a high-level statistical software that performs
7
- # mathematical computations for analyzing film data from imdb.
8
- # In a nut shell, it is a solution to the common problem of deducing logical hypothesis based off data sets.
9
-
6
+ # MoviesDB is not a datastore gem. Rather, it is a high-level statistical software that performs
7
+ # mathematical computations for analyzing film data from imdb.
8
+ # It is a solution to the common problem of deducing logical hypothesis based off movie data.
10
9
  class Base
11
10
  include StatusChecker
12
11
  include MovieError
13
12
  end
14
-
15
13
  end
16
14
  $:.unshift File.expand_path('..', __FILE__)
15
+
@@ -1,18 +1,15 @@
1
1
  require 'MovieDB'
2
2
 
3
3
  module MovieDB
4
-
5
- ##
6
- # Analysing, inspecting, cleaning, transforming and modeling data.
7
-
4
+ # Analyzing, inspecting, cleaning, transforming and modeling data.
5
+ #
8
6
  class DataAnalysis < MovieDB::Movie
9
7
  module AnalysisOfVariance
10
8
  module LeastSquares
11
9
  module Statistic
12
-
13
10
  def basic_statistic (directory_name)
14
11
  open_spreadsheet(directory_name)
15
-
12
+
16
13
  if check_imdb_count == true
17
14
  puts "*"*41
18
15
  puts "* A minimum of 2 Imdb id's are required *"
@@ -29,76 +26,58 @@ module MovieDB
29
26
  @book = Spreadsheet.open File.join('reports', directory_name)
30
27
  @sheet = @book.worksheet(0)
31
28
 
32
- ##
33
- # Add document formatting
29
+ title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
34
30
 
35
- title_format = Spreadsheet::Format.new :color => :blue,
36
- :weight => :bold,
37
- :size => 13
38
-
39
- @sheet.column(22).width = "worldwide_gross".length
31
+ @sheet.column(22).width = "worldwide_gross".length
40
32
  end
41
33
 
42
34
  def check_imdb_count
43
35
  @sheet.rows.count - 1 == 1
44
36
  end
45
37
 
46
- def perform_computation
47
-
38
+ def perform_computation
39
+ # Perform computation on the data collected.
40
+ # TODO: Need to use coefficienct statistical formula.
41
+ # Calculate median as an example but COD formula must be used.
42
+ # Mean is commonly called as average. Mean or Average is defined as the sum of
43
+ # all the given elements divided by the total number of elements.
44
+ #
45
+ # Range is the difference between the highest and the lowest values in a
46
+ # frequency distribution.
47
+ #
48
+ # Mode is the most frequently occurring value in a frequency distribution.
49
+ #
50
+ # Calculate Standard Deviation.
51
+ # Standard deviation is a statistical measure of spread or variability.
52
+ #
53
+ # The standard deviation is the root mean square (RMS) deviation of the
54
+ # values from their arithmetic mean.
48
55
  total_columns = 22
49
56
  @column = []
50
- @row_count = @sheet.rows.count
51
57
 
52
- ##
53
- # Use this total column count to make it dynamic
54
- #total_columns = @column_count = @sheet.column_count
58
+ @row_count = @sheet.rows.count
55
59
 
56
60
  1.upto(total_columns) do |c|
57
- @column = [] # set instance variable to an empty array
58
-
59
- ##
60
- # loop through to collect all elements
61
- # The returned array includes both strings and integers elements
61
+ @column = []
62
62
 
63
63
  @sheet.each_with_index do |row, i|
64
64
  @column << @sheet[i, 0 + c ]
65
65
  end
66
66
 
67
- @column.shift # delete the string header from the array
68
- @column.compact! # delete nil from the array
69
- row_count = @sheet.rows.count
70
-
71
- ##
72
- # Perform computation on the data collected
73
- # TODO: Need to use coefficienct statistical formula
74
- # Calculate median as an example but COD formula must be used
67
+ @column.shift
68
+ @column.compact!
75
69
 
70
+ row_count = @sheet.rows.count
76
71
 
77
- if @column.all? {|i| (1..99999999999).include? (i)}
78
-
72
+ if @column.all? { |i| (1..99999999999).include? (i) }
79
73
  n = @column.count
80
74
  @column.sort!
81
75
 
82
- ##
83
- # Mean is commonly called as average.Mean or Average is defined as the sum of
84
- # all the given elements divided by the total number of elements.
85
- #
86
- # Range is the difference between the highest and the lowest values in a
87
- # frequency distribution.
88
- #
89
- # Mode is the most frequently occurring value in a frequency distribution.
90
-
91
- @mean = @column.sum/n # Find the mean
92
- @range = @column.max - @column.min # Find the range
76
+ @mean = @column.sum / n
77
+ @range = @column.max - @column.min
93
78
 
94
79
  freq = @column.inject(Hash.new(0)) { |h, v| h[v] += 1; h }
95
- @mode = @column.sort_by { |v| freq[v]}.last # Find the mode
96
-
97
- ##
98
- # Calculate Standard Deviation
99
- # Standard deviation is a statistical measure of spread or variability.
100
- # The standard deviation is the root mean square (RMS) deviation of the
101
- # values from their arithmetic mean.
80
+ @mode = @column.sort_by { |v| freq[v] }.last # Find the mode
102
81
 
103
82
  @column_squared = []
104
83
  @column.each do |col|
@@ -107,15 +86,15 @@ module MovieDB
107
86
 
108
87
  @sum_of_column = @column.sum
109
88
  @sum_of_column_squared = @column_squared.sum
110
- @standard_dev = Math.sqrt((@sum_of_column_squared -((@sum_of_column)*(@sum_of_column)/n))/(n-1))
89
+ @standard_dev = Math.sqrt((@sum_of_column_squared - ((@sum_of_column) * (@sum_of_column) / n)) / (n - 1))
111
90
 
112
91
  if n.odd?
113
- index = (n + 1)/2
114
- @median = @column[index - 1] # Subtract -1 to reduce index value since array start with an index 0.
92
+ index = (n + 1) / 2
93
+ @median = @column[index - 1]
115
94
  else
116
- middle_index = n/2
95
+ middle_index = n / 2
117
96
  right_index = middle_index + 1
118
- @median = (@column[middle_index - 1] + @column[right_index - 1])/2
97
+ @median = (@column[middle_index - 1] + @column[right_index - 1]) / 2
119
98
  end
120
99
 
121
100
  else
@@ -126,9 +105,6 @@ module MovieDB
126
105
  @standard_dev = "N/A"
127
106
  end
128
107
 
129
- ##
130
- # Insert results into spreadsheet cell
131
-
132
108
  @sheet[@row_count + 2, 0 ] = "Mean"
133
109
  @sheet[@row_count + 2, 0 + c ] = @mean
134
110
 
@@ -143,7 +119,6 @@ module MovieDB
143
119
 
144
120
  @sheet[@row_count + 6, 0 ] = "Standard Deviation"
145
121
  @sheet[@row_count + 6, 0 + c ] = @standard_dev
146
-
147
122
  end
148
123
  end
149
124
 
@@ -155,13 +130,16 @@ module MovieDB
155
130
  end
156
131
 
157
132
  def insert_data_to_existing_xls_file
158
-
159
133
  filename = ("#{report_name}.xls")
160
134
  @book.write File.join('reports', filename)
161
135
  return filename
162
136
  end
163
137
  end
164
- module Coefficient_Of_Determination; end
138
+
139
+ module Coefficient_Of_Determination
140
+ # TODO: Add code.
141
+ end
142
+
165
143
  module Discrete_Least_Squares_Meshless_Method; end
166
144
  module Explained_Sum_Of_Squares; end
167
145
  module Fraction_Of_Variance_Unexplained; end
@@ -192,12 +170,10 @@ module MovieDB
192
170
  module Variable_Kernel_Density_Estimation; end
193
171
  end
194
172
 
195
- ##
196
- # primarily EDA is for seeing what the data can
197
- # tell us beyond the formal modeling or hypothesis testing task
198
- # The output will be a visual material
199
-
200
173
  module ExploratoryDataAnalysis
174
+ # primarily EDA is for seeing what the data can
175
+ # tell us beyond the formal modeling or hypothesis testing task.
176
+ # The output will be a visual material.
201
177
  module Data_Reduction; end
202
178
  module Table_Diagonalization; end
203
179
  module Configural_Frequency_Analysis; end
@@ -216,12 +192,12 @@ module MovieDB
216
192
  module RegressionAnalysis
217
193
  module Choice_Modelling; end
218
194
 
219
- module Generalized_Linear_Model
220
- module Binomial_Regression; end
221
- module Generalized_Additive_Model; end
222
- module Linear_Probability_Model; end
223
- module Poisson_Regression; end
224
- module Zero_Inflated_Model; end
195
+ module Generalized_Linear_Model
196
+ module Binomial_Regression; end
197
+ module Generalized_Additive_Model; end
198
+ module Linear_Probability_Model; end
199
+ module Poisson_Regression; end
200
+ module Zero_Inflated_Model; end
225
201
  end
226
202
 
227
203
  module Nonparametric_Regression; end
@@ -251,23 +227,19 @@ module MovieDB
251
227
  end
252
228
  end
253
229
 
254
- ##
255
- #TODO: All Mathetical Calculations go here.
256
-
257
230
  class ExportData
258
231
  def write_spreadsheet (data, data_analysis_name)
259
-
260
232
  begin data_analysis.is_a? String
261
233
  @data_analysis_name = data_analysis_name.split.join.gsub('_', ' ').downcase.to_s
262
234
  case data_analysis_name
263
- when "coefficient of determination"
264
- write_coefficient_of_determination
265
- when "discrete least squares meshless method"
266
- write_discrete_least_squares_meshless_method
267
- when "discrete least squares meshless method"
268
- write_discrete_least_squares_meshless_method
269
- else
270
- end
235
+ when "coefficient of determination"
236
+ write_coefficient_of_determination
237
+ when "discrete least squares meshless method"
238
+ write_discrete_least_squares_meshless_method
239
+ when "discrete least squares meshless method"
240
+ write_discrete_least_squares_meshless_method
241
+ else
242
+ end
271
243
  rescue
272
244
  raise ArgumentError, 'invalid attribute'
273
245
  end
@@ -275,12 +247,10 @@ module MovieDB
275
247
 
276
248
  def write_coefficient_of_determination
277
249
  book = Spreadsheet::Workbook.new
250
+
278
251
  sheet1 = book.create_worksheet name: "Data Analysis: #{@data_analysis_name}"
279
252
  sheet1.row(0).concat %w{title released_date worldwide_gross}
280
253
 
281
- # Loop through the data to collect all values.
282
- # Then values into array
283
-
284
254
  data.each_with_index do |value, index|
285
255
  sheet1[1, index] = "#{value}"
286
256
  end
@@ -1,13 +1,14 @@
1
1
  require "spreadsheet"
2
2
  require "MovieDB"
3
-
3
+
4
4
  # This module will write xls document to file
5
5
  #
6
- # Usage @book = Spreadsheet::Workbook.new
7
-
6
+ # Usage
7
+ #
8
+ # @book = Spreadsheet::Workbook.new
8
9
  module MovieDB
9
10
  class DataExport < MovieDB::Movie
10
- class << self
11
+ class << self
11
12
  #TODO: Check the data analysis(DA) name. Write a define_method and include the DA.
12
13
 
13
14
  def export_movie_data
@@ -17,12 +18,11 @@ module MovieDB
17
18
  end
18
19
 
19
20
  def create_spreadsheet_file
20
- directory_name = ('reports')
21
+ directory_name = 'reports'
21
22
  create_directory(directory_name)
22
23
  Spreadsheet.client_encoding = 'UTF-8'
23
24
  @book = Spreadsheet::Workbook.new
24
- @sheet = @book.create_worksheet name: "Data Analysis: #{$DATA_ANALYSIS_NAME}" # the analysis nameshould be an input
25
-
25
+ @sheet = @book.create_worksheet name: "Data Analysis: #{$DATA_ANALYSIS_NAME}" # the analysis name should be an input
26
26
  end
27
27
 
28
28
  def create_directory(directory_name)
@@ -37,26 +37,25 @@ module MovieDB
37
37
  def create_spreadsheet_header
38
38
  @sheet.row(0).concat $IMDB_ATTRIBUTES_HEADERS
39
39
 
40
- title_format = Spreadsheet::Format.new :color => :blue,
41
- :weight => :bold,
42
- :size => 13
43
-
40
+ title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
44
41
  float_format = Spreadsheet::Format.new :number_format => "0.00"
45
42
 
46
43
  @sheet.row(0).default_format = title_format
44
+
47
45
  @sheet.column(1).default_format = float_format
48
46
  @sheet.column(16).default_format = float_format
49
47
  @sheet.column(22).default_format = float_format
50
48
  end
51
49
 
52
- # Loop through array of and array imbd data. Each row has the
50
+ # Loop through array of and array imbd data. Each row has the
53
51
  # the information about the film/movie
54
52
  # The Data is obtained from MovieDB::Movie
55
53
  # example
56
- # catching fire |
54
+ #
55
+ # Film: catching fire
57
56
  def create_spreadsheet_body
58
- $IMDB_ATTRIBUTES_HEADERS.each do |header|
59
- case header
57
+ $IMDB_ATTRIBUTES_HEADERS.each do |header|
58
+ case header
60
59
  when 'title' then spreadsheet_body_text_data("title")
61
60
  when 'cast_members' then spreadsheet_body_count_data("cast_members")
62
61
  when 'cast_characters' then spreadsheet_body_count_data("cast_characters")
@@ -86,19 +85,19 @@ module MovieDB
86
85
  end
87
86
 
88
87
  def spreadsheet_body_text_data(header_title)
89
- @e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}.flatten
88
+ @e_t = element_title = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }.flatten
90
89
 
91
90
  element_title.each_with_index do |element2, i|
92
91
  element_array = element_title[(i)].split(' ',)
93
- @sheet.row(1 + i).concat element_array
92
+ @sheet.row(1 + i).concat element_array
94
93
  end
95
94
  end
96
95
 
97
96
  def spreadsheet_body_count_data(header_title)
98
- element_cast = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
97
+ element_cast = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }
99
98
 
100
- 0.upto(@e_t.length - 1) do |i|
101
- element_array = []
99
+ 0.upto(@e_t.length - 1) do |i|
100
+ element_array = []
102
101
 
103
102
  element_array << element_cast[i].length
104
103
  @sheet.row(1 + i).concat element_array
@@ -106,7 +105,7 @@ module MovieDB
106
105
  end
107
106
 
108
107
  def spreadsheet_body_numeric_data(header_title)
109
- @e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
108
+ @e_t = element_title = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }
110
109
 
111
110
  element_title.each_with_index do |element2, i|
112
111
  element_array = element_title[(i)]
@@ -5,6 +5,7 @@ module MovieDB
5
5
  PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
6
6
  extend PATH_AOV::Statistic
7
7
  extend PATH_AOV::Coefficient_Of_Determination
8
+
8
9
  include PATH_AOV::Explained_Sum_Of_Squares
9
10
  include PATH_AOV::Fraction_Of_Variance_Unexplained
10
11
  include PATH_AOV::Gauss_Newton_Algorithm
@@ -26,4 +26,3 @@ Talk-Show
26
26
  Thriller
27
27
  War
28
28
  Western
29
-
@@ -1,20 +1,19 @@
1
1
  require 'rubygems'
2
2
  require 'time'
3
-
4
- ##
5
- # Create an actor instance and return the values for
6
- # actor = MovieDB::Actor.instance_eval{create_with_info("Brittany Murphy", "F", "1977-11-10", "2009-12-20")}
7
- # actor = MovieDB::Actor.instance_eval{create_with_info("George Clooney", "M", "1961-05-06", nil)}
8
- # # Name
9
- # # actor_name = actor.map(&:name) #=> ["Brittany Murphy"]
10
- #
11
- # # Alive?
12
- # # actor_name = actor.map(&:alive?) #=> [false, true]
13
- #
14
- # # Age
15
- # # actor_name = actor.map(&:age) #=> [32, 52]
16
- #
17
-
3
+ # Create an actor instance and return the values for the actor variable.
4
+ #
5
+ # actor = MovieDB::Actor.instance_eval{create_with_info("Brittany Murphy", "F", "1977-11-10", "2009-12-20")}
6
+ # actor = MovieDB::Actor.instance_eval{create_with_info("George Clooney", "M", "1961-05-06", nil)}
7
+
8
+ # Example to find the actor name:
9
+ #
10
+ # actor_name = actor.map(&:name) #=> ["Brittany Murphy"]
11
+ #
12
+ # Example to see if an actoyre is alive:
13
+ # actor_name = actor.map(&:alive?) #=> [false, true]
14
+ #
15
+ # Example to find an actor's age:
16
+ # actor_name = actor.map(&:age) #=> [32, 52]
18
17
  module MovieDB
19
18
  class Person
20
19
  attr_accessor :name, :gender, :birth_date, :death_date, :birthplace
@@ -49,27 +48,28 @@ module MovieDB
49
48
  person.gender = gender
50
49
  person.birth_date = birth_date
51
50
  person.death_date = death_date
51
+
52
52
  return @person_DS << person
53
53
  end
54
54
 
55
55
  def filter_person(attr)
56
56
  attr = attr.to_sym
57
- raise ArgumentError, "#{attr} can only be name or age" if !attr == :age && :name
57
+ raise ArgumentError "#{attr} can only be name or age" if !attr == :age && :name
58
+
58
59
  return @person_DS.select{|s| s.alive?}.map(&attr)
59
60
  end
60
61
 
61
- ##
62
- # Returns a random parameter integer between min to max,
63
- # rather than a float between min to max.(Ruby 2.0.0)
64
- #
65
-
62
+ # Returns a random parameter integer between min to max,
63
+ # rather than a float between min to max.(Ruby 2.0.0)
66
64
  def sample_attr(attr)
67
65
  randgen = Object.new
68
66
  attr_array = self.instance_eval{filter_person(attr)}
69
67
  attr_array.sample(random: randgen)
70
68
  end
71
69
  end
70
+
72
71
  private_class_method :create_with_info, :filter_person
72
+
73
73
  end
74
74
 
75
75
  class Actor < Person
@@ -85,7 +85,8 @@ module MovieDB
85
85
  end
86
86
 
87
87
  def actor_actress_gender(person)
88
- case when person.gender == 'F'
88
+ case
89
+ when person.gender == 'F'
89
90
  return "actress"
90
91
  when person.gender == "M"
91
92
  return "actor"
@@ -95,23 +96,22 @@ module MovieDB
95
96
  end
96
97
 
97
98
  class << self
98
-
99
99
  def filter_actor_alive(attr)
100
100
  attr = attr.to_sym
101
- raise ArgumentError, "#{attr} can only be name or age" if !attr == :age && :name
101
+ raise ArgumentError "#{attr} can only be name or age" if !attr == :age && :name
102
+
102
103
  return @person_DS.select{|s| s.alive?}.map(&"#{attr.to_sym}")
103
104
  end
104
105
 
105
106
  def filter_actor_deceased(actor)
106
- return @person_DS.select{|s| !s.alive?}.map{|m| "#{m.age}"} if attr == "age"
107
- return @person_DS.select{|s| !s.alive?}.map{|m| "#{m.name}"} if attr == "name"
107
+ return @person_DS.select{ |s| !s.alive?}.map{ |m| "#{m.age}" } if attr == "age"
108
+ return @person_DS.select{ |s| !s.alive?}.map{ |m| "#{m.name}" } if attr == "name"
108
109
  end
109
110
 
110
111
  end
111
112
  end
112
113
 
113
114
  class Writer < Person
114
-
115
115
  attr_accessor :published_work
116
116
  alias :published? :published_work
117
117
 
@@ -123,7 +123,6 @@ module MovieDB
123
123
  end
124
124
 
125
125
  class Director < Person
126
-
127
126
  attr_accessor :filmography
128
127
 
129
128
  def initialize(filmography = [])