RubyGems - movieDB - Versions diffs - 0.2.2 → 0.2.4 - Mend

movieDB 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +15 -0
data/.gitignore +1 -0
data/.travis.yml +30 -0
data/README.md +80 -74
data/lib/movieDB.rb +225 -268
data/lib/movieDB/base.rb +4 -5
data/lib/movieDB/data_analysis.rb +59 -89
data/lib/movieDB/data_export.rb +20 -21
data/lib/movieDB/data_process.rb +1 -0
data/lib/movieDB/genres/en.txt +0 -1
data/lib/movieDB/person.rb +27 -28
data/lib/movieDB/status_checker.rb +30 -32
data/lib/movieDB/version.rb +1 -1
data/movieDB.gemspec +1 -1
data/spec/data_process_spec.rb +1 -1
data/spec/movieDB_spec.rb +5 -7
data/spec/person_spec.rb +14 -14
metadata +6 -28
data/npm-debug.log +0 -0

data/lib/movieDB/base.rb CHANGED Viewed

@@ -3,14 +3,13 @@ require 'MovieDB/status_checker'
 require 'MovieDB/movie_error'
 module MovieDB #:nodoc
-# MoviesDB v0.1.x is not a datastore gem. Rather, it is a high-level statistical software that performs
-# mathematical computations for analyzing film data from imdb.
-# In a nut shell, it is a solution to the common problem of deducing logical hypothesis based off data sets.
+# MoviesDB is not a datastore gem. Rather, it is a high-level statistical software that performs
+# mathematical computations for analyzing film data from imdb.
+# It is a solution to the common problem of deducing logical hypothesis based off movie data.
   class Base
     include StatusChecker
     include MovieError
   end
 end
 $:.unshift File.expand_path('..', __FILE__)

data/lib/movieDB/data_analysis.rb CHANGED Viewed

@@ -1,18 +1,15 @@
 require 'MovieDB'
 module MovieDB
-  ##
-  # Analysing, inspecting, cleaning, transforming and modeling data.
+  # Analyzing, inspecting, cleaning, transforming and modeling data.
+  #
   class DataAnalysis < MovieDB::Movie
     module AnalysisOfVariance
       module LeastSquares
         module Statistic
           def basic_statistic (directory_name)
             open_spreadsheet(directory_name)
             if check_imdb_count == true
                puts "*"*41
                puts "* A minimum of 2 Imdb id's are required *"
@@ -29,76 +26,58 @@ module MovieDB
             @book = Spreadsheet.open File.join('reports', directory_name)
             @sheet = @book.worksheet(0)
-            ##
-            # Add document formatting
+            title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
-            title_format = Spreadsheet::Format.new :color => :blue,
-                                 :weight => :bold,
-                                 :size => 13
-           @sheet.column(22).width = "worldwide_gross".length
+            @sheet.column(22).width = "worldwide_gross".length
           end
           def check_imdb_count
             @sheet.rows.count - 1 == 1
           end
-          def perform_computation
+          def perform_computation
+          # Perform computation on the data collected.
+          # TODO: Need to use coefficienct statistical formula.
+          # Calculate median as an example but COD formula must be used.
+          # Mean is commonly called as average. Mean or Average is defined as the sum of
+          # all the given elements divided by the total number of elements.
+          #
+          # Range is the difference between the highest and the lowest values in a
+          # frequency distribution.
+          #
+          # Mode is the most frequently occurring value in a frequency distribution.
+          #
+          # Calculate Standard Deviation.
+          # Standard deviation is a statistical measure of spread or variability.
+          #
+          # The standard deviation is the root mean square (RMS) deviation of the
+          # values from their arithmetic mean.
             total_columns = 22
             @column = []
-            @row_count = @sheet.rows.count
-            ##
-            # Use this total column count to make it dynamic
-            #total_columns = @column_count = @sheet.column_count
+            @row_count = @sheet.rows.count
             1.upto(total_columns) do |c|
-              @column = [] # set instance variable to an empty array
-              ##
-              # loop through to collect all elements
-              # The returned array includes both strings and integers elements
+              @column = []
               @sheet.each_with_index do |row, i|
                 @column << @sheet[i, 0 + c ]
               end
-              @column.shift # delete the string header from the array
-              @column.compact! # delete nil from the array
-              row_count = @sheet.rows.count
-              ##
-              # Perform computation on the data collected
-              # TODO: Need to use coefficienct statistical formula
-              # Calculate median as an example but COD formula must be used
+              @column.shift
+              @column.compact!
+              row_count = @sheet.rows.count
-              if @column.all? {|i| (1..99999999999).include? (i)}
+              if @column.all? { |i| (1..99999999999).include? (i) }
                 n = @column.count
                     @column.sort!
-                ##
-                # Mean is commonly called as average.Mean or Average is defined as the sum of
-                # all the given elements divided by the total number of elements.
-                #
-                # Range is the difference between the highest and the lowest values in a
-                # frequency distribution.
-                #
-                # Mode is the most frequently occurring value in a frequency distribution.
-                @mean = @column.sum/n # Find the mean
-                @range = @column.max - @column.min # Find the range
+                @mean = @column.sum / n
+                @range = @column.max - @column.min
                 freq = @column.inject(Hash.new(0)) { |h, v| h[v] += 1; h }
-                @mode =  @column.sort_by { |v| freq[v]}.last # Find the mode
-                ##
-                # Calculate Standard Deviation
-                # Standard deviation is a statistical measure of spread or variability.
-                # The standard deviation is the root mean square (RMS) deviation of the
-                # values from their arithmetic mean.
+                @mode =  @column.sort_by { |v| freq[v] }.last # Find the mode
                 @column_squared = []
                 @column.each do |col|
@@ -107,15 +86,15 @@ module MovieDB
                 @sum_of_column = @column.sum
                 @sum_of_column_squared = @column_squared.sum
-                @standard_dev = Math.sqrt((@sum_of_column_squared -((@sum_of_column)*(@sum_of_column)/n))/(n-1))
+                @standard_dev = Math.sqrt((@sum_of_column_squared - ((@sum_of_column) * (@sum_of_column) / n)) / (n - 1))
                  if n.odd?
-                   index = (n + 1)/2
-                   @median = @column[index - 1] # Subtract -1 to reduce index value since array start with an index 0.
+                   index = (n + 1) / 2
+                   @median = @column[index - 1]
                  else
-                   middle_index = n/2
+                   middle_index = n / 2
                    right_index = middle_index + 1
-                   @median = (@column[middle_index - 1] + @column[right_index - 1])/2
+                   @median = (@column[middle_index - 1] + @column[right_index - 1]) / 2
                  end
               else
@@ -126,9 +105,6 @@ module MovieDB
                 @standard_dev = "N/A"
               end
-              ##
-              # Insert results into spreadsheet cell
               @sheet[@row_count + 2, 0 ] =  "Mean"
               @sheet[@row_count + 2, 0 + c ] = @mean
@@ -143,7 +119,6 @@ module MovieDB
               @sheet[@row_count + 6, 0 ] =  "Standard Deviation"
               @sheet[@row_count + 6, 0 + c ] =  @standard_dev
             end
           end
@@ -155,13 +130,16 @@ module MovieDB
           end
           def insert_data_to_existing_xls_file
             filename = ("#{report_name}.xls")
             @book.write File.join('reports', filename)
             return filename
           end
         end
-        module Coefficient_Of_Determination; end
+        module Coefficient_Of_Determination
+         # TODO: Add code.
+        end
         module Discrete_Least_Squares_Meshless_Method; end
         module Explained_Sum_Of_Squares; end
         module Fraction_Of_Variance_Unexplained; end
@@ -192,12 +170,10 @@ module MovieDB
       module Variable_Kernel_Density_Estimation; end
     end
-    ##
-    # primarily EDA is for seeing what the data can
-    # tell us beyond the formal modeling or hypothesis testing task
-    # The output will be a visual material
     module ExploratoryDataAnalysis
+    # primarily EDA is for seeing what the data can
+    # tell us beyond the formal modeling or hypothesis testing task.
+    # The output will be a visual material.
       module Data_Reduction; end
       module Table_Diagonalization; end
       module Configural_Frequency_Analysis; end
@@ -216,12 +192,12 @@ module MovieDB
     module RegressionAnalysis
       module Choice_Modelling; end
-      module Generalized_Linear_Model
-        module Binomial_Regression; end
-        module Generalized_Additive_Model; end
-        module Linear_Probability_Model; end
-        module Poisson_Regression; end
-        module Zero_Inflated_Model; end
+      module Generalized_Linear_Model
+        module Binomial_Regression; end
+        module Generalized_Additive_Model; end
+        module Linear_Probability_Model; end
+        module Poisson_Regression; end
+        module Zero_Inflated_Model; end
       end
       module Nonparametric_Regression; end
@@ -251,23 +227,19 @@ module MovieDB
     end
   end
-  ##
-  #TODO: All Mathetical Calculations go here.
   class ExportData
     def write_spreadsheet (data, data_analysis_name)
       begin data_analysis.is_a? String
         @data_analysis_name = data_analysis_name.split.join.gsub('_', ' ').downcase.to_s
         case data_analysis_name
-          when "coefficient of determination"
-            write_coefficient_of_determination
-          when  "discrete least squares meshless method"
-            write_discrete_least_squares_meshless_method
-          when "discrete least squares meshless method"
-            write_discrete_least_squares_meshless_method
-          else
-          end
+        when "coefficient of determination"
+          write_coefficient_of_determination
+        when  "discrete least squares meshless method"
+          write_discrete_least_squares_meshless_method
+        when "discrete least squares meshless method"
+          write_discrete_least_squares_meshless_method
+        else
+        end
       rescue
         raise ArgumentError, 'invalid attribute'
       end
@@ -275,12 +247,10 @@ module MovieDB
     def write_coefficient_of_determination
       book = Spreadsheet::Workbook.new
       sheet1 = book.create_worksheet name: "Data Analysis: #{@data_analysis_name}"
       sheet1.row(0).concat %w{title released_date worldwide_gross}
-      # Loop through the data to collect  all values.
-      # Then values into array
       data.each_with_index do |value, index|
         sheet1[1, index] = "#{value}"
       end

data/lib/movieDB/data_export.rb CHANGED Viewed

@@ -1,13 +1,14 @@
 require "spreadsheet"
 require "MovieDB"
   # This module will write xls document to file
   #
-  # Usage @book = Spreadsheet::Workbook.new
+  # Usage
+  #
+  #    @book = Spreadsheet::Workbook.new
 module MovieDB
   class DataExport < MovieDB::Movie
-    class  << self
+    class  << self
       #TODO: Check the data analysis(DA) name. Write a define_method and include the DA.
       def export_movie_data
@@ -17,12 +18,11 @@ module MovieDB
       end
       def create_spreadsheet_file
-        directory_name = ('reports')
+        directory_name = 'reports'
         create_directory(directory_name)
         Spreadsheet.client_encoding = 'UTF-8'
         @book = Spreadsheet::Workbook.new
-        @sheet = @book.create_worksheet name: "Data Analysis: #{$DATA_ANALYSIS_NAME}" # the analysis nameshould be an input
+        @sheet = @book.create_worksheet name: "Data Analysis: #{$DATA_ANALYSIS_NAME}" # the analysis name should be an input
       end
       def create_directory(directory_name)
@@ -37,26 +37,25 @@ module MovieDB
       def create_spreadsheet_header
         @sheet.row(0).concat $IMDB_ATTRIBUTES_HEADERS
-        title_format = Spreadsheet::Format.new :color => :blue,
-                                         :weight => :bold,
-                                         :size => 13
+        title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
         float_format = Spreadsheet::Format.new :number_format => "0.00"
         @sheet.row(0).default_format = title_format
         @sheet.column(1).default_format = float_format
         @sheet.column(16).default_format = float_format
         @sheet.column(22).default_format = float_format
       end
-      # Loop through array of and array imbd data. Each row has the
+      # Loop through array of and array imbd data. Each row has the
       # the information about the film/movie
       # The Data is obtained from MovieDB::Movie
       # example
-      # catching fire |
+      #
+      #   Film: catching fire
       def create_spreadsheet_body
-       $IMDB_ATTRIBUTES_HEADERS.each do |header|
-        case header
+        $IMDB_ATTRIBUTES_HEADERS.each do |header|
+          case header
           when 'title' then spreadsheet_body_text_data("title")
           when 'cast_members' then spreadsheet_body_count_data("cast_members")
           when 'cast_characters' then spreadsheet_body_count_data("cast_characters")
@@ -86,19 +85,19 @@ module MovieDB
       end
       def spreadsheet_body_text_data(header_title)
-        @e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}.flatten
+        @e_t = element_title = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }.flatten
         element_title.each_with_index do |element2, i|
           element_array = element_title[(i)].split('   ',)
-          @sheet.row(1 + i).concat element_array
+          @sheet.row(1 + i).concat element_array
         end
       end
       def spreadsheet_body_count_data(header_title)
-         element_cast = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
+        element_cast = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }
-         0.upto(@e_t.length - 1) do |i|
-           element_array = []
+        0.upto(@e_t.length - 1) do |i|
+          element_array = []
           element_array << element_cast[i].length
           @sheet.row(1 + i).concat element_array
@@ -106,7 +105,7 @@ module MovieDB
       end
       def spreadsheet_body_numeric_data(header_title)
-        @e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
+        @e_t = element_title = MovieDB::Movie.instance_eval { filter_movie_attr(header_title) }
         element_title.each_with_index do |element2, i|
           element_array = element_title[(i)]

data/lib/movieDB/data_process.rb CHANGED Viewed

@@ -5,6 +5,7 @@ module MovieDB
     PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
     extend PATH_AOV::Statistic
     extend PATH_AOV::Coefficient_Of_Determination
     include PATH_AOV::Explained_Sum_Of_Squares
     include PATH_AOV::Fraction_Of_Variance_Unexplained
     include PATH_AOV::Gauss_Newton_Algorithm

data/lib/movieDB/genres/en.txt CHANGED Viewed

@@ -26,4 +26,3 @@ Talk-Show
 Thriller
 War
 Western

data/lib/movieDB/person.rb CHANGED Viewed

@@ -1,20 +1,19 @@
 require 'rubygems'
 require 'time'
-  ##
-  # Create an actor instance and return the values  for
-  # actor = MovieDB::Actor.instance_eval{create_with_info("Brittany Murphy", "F", "1977-11-10", "2009-12-20")}
-  # actor = MovieDB::Actor.instance_eval{create_with_info("George Clooney", "M", "1961-05-06", nil)}
-  #   # Name
-  #   # actor_name = actor.map(&:name) #=> ["Brittany Murphy"]
-  #
-  #   # Alive?
-  #   # actor_name = actor.map(&:alive?) #=> [false, true]
-  #
-  #   # Age
-  #   # actor_name = actor.map(&:age) #=> [32, 52]
-  #
+# Create an actor instance and return the values for the actor variable.
+#
+#    actor = MovieDB::Actor.instance_eval{create_with_info("Brittany Murphy", "F", "1977-11-10", "2009-12-20")}
+#    actor = MovieDB::Actor.instance_eval{create_with_info("George Clooney", "M", "1961-05-06", nil)}
+#  Example to find the actor name:
+#
+#    actor_name = actor.map(&:name) #=> ["Brittany Murphy"]
+#
+#  Example to see if an actoyre is alive:
+#    actor_name = actor.map(&:alive?) #=> [false, true]
+#
+#   Example to find an actor's age:
+#     actor_name = actor.map(&:age) #=> [32, 52]
 module MovieDB
   class Person
     attr_accessor :name, :gender, :birth_date, :death_date, :birthplace
@@ -49,27 +48,28 @@ module MovieDB
         person.gender = gender
         person.birth_date = birth_date
         person.death_date = death_date
         return @person_DS << person
       end
       def filter_person(attr)
        attr = attr.to_sym
-       raise ArgumentError, "#{attr} can only be name or age" if !attr == :age && :name
+       raise ArgumentError "#{attr} can only be name or age" if !attr == :age && :name
        return @person_DS.select{|s| s.alive?}.map(&attr)
       end
-  ##
-  # Returns a random parameter integer between min to max,
-  # rather than a float between min to max.(Ruby 2.0.0)
-  #
+      # Returns a random parameter integer between min to max,
+      # rather than a float between min to max.(Ruby 2.0.0)
       def sample_attr(attr)
         randgen = Object.new
         attr_array = self.instance_eval{filter_person(attr)}
         attr_array.sample(random: randgen)
       end
     end
     private_class_method :create_with_info, :filter_person
   end
   class Actor < Person
@@ -85,7 +85,8 @@ module MovieDB
     end
     def actor_actress_gender(person)
-      case when person.gender == 'F'
+      case
+      when person.gender == 'F'
         return "actress"
       when person.gender == "M"
         return "actor"
@@ -95,23 +96,22 @@ module MovieDB
     end
     class << self
       def filter_actor_alive(attr)
        attr = attr.to_sym
-       raise ArgumentError, "#{attr} can only be name or age" if !attr == :age && :name
+       raise ArgumentError "#{attr} can only be name or age" if !attr == :age && :name
        return @person_DS.select{|s| s.alive?}.map(&"#{attr.to_sym}")
       end
       def filter_actor_deceased(actor)
-       return @person_DS.select{|s| !s.alive?}.map{|m| "#{m.age}"} if attr == "age"
-       return @person_DS.select{|s| !s.alive?}.map{|m| "#{m.name}"} if attr == "name"
+       return @person_DS.select{ |s| !s.alive?}.map{ |m| "#{m.age}" } if attr == "age"
+       return @person_DS.select{ |s| !s.alive?}.map{ |m| "#{m.name}" } if attr == "name"
       end
     end
   end
   class Writer < Person
     attr_accessor :published_work
     alias :published? :published_work
@@ -123,7 +123,6 @@ module MovieDB
   end
   class Director < Person
     attr_accessor :filmography
     def initialize(filmography = [])