RubyGems - movieDB - Versions diffs - 0.3.4 → 1.0.0 - Mend

movieDB 0.3.4 → 1.0.0

Files changed (31) hide show

checksums.yaml +4 -4
data/.DS_Store +0 -0
data/.coveralls.yml +2 -0
data/.gitignore +3 -1
data/.rspec +2 -0
data/.travis.yml +5 -0
data/Gemfile +8 -3
data/README.md +250 -103
data/Rakefile +3 -0
data/lib/movieDB.rb +22 -141
data/lib/movieDB/base.rb +3 -10
data/lib/movieDB/data_analysis/statistics.rb +85 -0
data/lib/movieDB/data_store.rb +83 -0
data/lib/movieDB/relation/query_methods.rb +139 -0
data/lib/movieDB/secret.rb +2 -6
data/lib/movieDB/support/reporting.rb +19 -0
data/lib/movieDB/version.rb +1 -1
data/movieDB.gemspec +6 -6
data/spec/movieDB/data_analysis/statistics_spec.rb +105 -0
data/spec/movieDB/data_store_spec.rb +31 -0
data/spec/movieDB/relation/query_methods_spec.rb +71 -0
data/spec/movieDB/support/reporting_spec.rb +12 -0
data/spec/movieDB_spec.rb +24 -0
data/spec/spec_helper.rb +29 -0
metadata +33 -23
data/lib/movieDB/data_analysis.rb +0 -263
data/lib/movieDB/data_export.rb +0 -96
data/lib/movieDB/data_process.rb +0 -26
data/lib/movieDB/movie_error.rb +0 -20
data/lib/movieDB/status_checker.rb +0 -48
data/test/unit/test_movie_db.rb +0 -97

data/lib/movieDB/data_analysis.rb DELETED

@@ -1,263 +0,0 @@
-require 'MovieDB'
-module MovieDB
-  # Analyzing, inspecting, cleaning, transforming and modeling data.
-  #
-  class DataAnalysis < MovieDB::Movie
-    module AnalysisOfVariance
-      module LeastSquares
-        module Statistic
-          def basic_statistic(directory_name)
-            open_spreadsheet(directory_name)
-            @directory_name = directory_name
-            if check_imdb_count == true
-               puts "*"*41
-               puts "* A minimum of 2 Imdb id's are required *"
-               puts "* To perform statistical data analysis  *"
-               puts "* You only have ONE Imdb id entered     *"
-               puts "*"*41
-            else
-              perform_computation
-              insert_data_to_existing_xls_file
-            end
-          end
-          def open_spreadsheet(directory_name)
-            @book = Spreadsheet.open File.join('reports', directory_name)
-            @sheet = @book.worksheet(0)
-            title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
-            @sheet.column(22).width = "worldwide_gross".length
-          end
-          def check_imdb_count
-            @sheet.rows.count - 1 == 1
-          end
-          def perform_computation
-          # Perform computation on the data collected.
-          #
-          # TODO: Need to use coefficienct statistical formula.
-          #
-          # Calculate median as an example but COD formula must be used.
-          # Mean is commonly called as average. Mean or Average is defined as the sum of
-          # all the given elements divided by the total number of elements.
-          #
-          # Range is the difference between the highest and the lowest values in a
-          # frequency distribution.
-          #
-          # Mode is the most frequently occurring value in a frequency distribution.
-          #
-          # Calculate Standard Deviation.
-          # Standard deviation is a statistical measure of spread or variability.
-          #
-          # The standard deviation is the root mean square (RMS) deviation of the
-          # values from their arithmetic mean.
-            total_columns = 22
-            @column = []
-            @row_count = @sheet.rows.count
-            1.upto(total_columns) do |c|
-              @column = []
-              @sheet.each_with_index do |row, i|
-                @column << @sheet[i, 0 + c ]
-              end
-              @column.shift
-              @column.compact!
-              row_count = @sheet.rows.count
-              if @column.all? { |i| (1..99999999999).include? (i) }
-                n = @column.count
-                    @column.sort!
-                @mean = @column.sum / n
-                @range = @column.max - @column.min
-                freq = @column.inject(Hash.new(0)) { |h, v| h[v] += 1; h }
-                @mode =  @column.sort_by { |v| freq[v] }.last # Find the mode
-                @column_squared = []
-                @column.each do |col|
-                  @column_squared << col**2
-                end
-                @sum_of_column = @column.sum
-                @sum_of_column_squared = @column_squared.sum
-                @standard_dev = Math.sqrt((@sum_of_column_squared - ((@sum_of_column) * (@sum_of_column) / n)) / (n - 1))
-                 if n.odd?
-                   index = (n + 1) / 2
-                   @median = @column[index - 1]
-                 else
-                   middle_index = n / 2
-                   right_index = middle_index + 1
-                   @median = (@column[middle_index - 1] + @column[right_index - 1]) / 2
-                 end
-              else
-                @median = "N/A"
-                @mean = "N/A"
-                @range = "N/A"
-                @mode = "N/A"
-                @standard_dev = "N/A"
-              end
-              @sheet[@row_count + 2, 0 ] =  "Mean"
-              @sheet[@row_count + 2, 0 + c ] = @mean
-              @sheet[@row_count + 3, 0 ] =  "Median"
-              @sheet[@row_count + 3, 0 + c ] =  @median
-              @sheet[@row_count + 4, 0 ] =  "Range"
-              @sheet[@row_count + 4, 0 + c ] =  @range
-              @sheet[@row_count + 5, 0 ] =  "Mode"
-              @sheet[@row_count + 5, 0 + c ] =  @mode
-              @sheet[@row_count + 6, 0 ] =  "Standard Deviation"
-              @sheet[@row_count + 6, 0 + c ] =  @standard_dev
-            end
-          end
-          def report_name
-            module_nesting = Module.nesting[0].to_s.gsub('::', ' ').split()
-            count = module_nesting.size
-            @data_analysis_name = module_nesting[count - 1]
-            @data_analysis_name << '_' <<  @directory_name.gsub('_.xls', '')
-          end
-          def insert_data_to_existing_xls_file
-            filename = ("#{report_name}.xls")
-            @book.write File.join('reports', filename)
-            return filename
-          end
-        end
-        module Coefficient_Of_Determination
-         # TODO: Add code.
-        end
-        module Discrete_Least_Squares_Meshless_Method; end
-        module Explained_Sum_Of_Squares; end
-        module Fraction_Of_Variance_Unexplained; end
-        module Gauss_Newton_Algorithm; end
-        module Iteratively_Reweighted_Least_Squares; end
-        module Lack_Of_Fit_Sum_Of_Squares; end
-        module Least_Squares_Support_Vector_Machine; end
-        module Mean_Squared_Error; end
-        module Moving_Least_Sqares; end
-        module Non_Linear_Iterative_Partial_Least_Squares; end
-        module Non_Linear_Least_Squares; end
-        module Ordinary_Least_Squares; end
-        module Partial_Least_Squares_Regression; end
-        module Partition_Of_Sums_Of_Squares; end
-        module Proofs_Involving_Ordinary_Least_Squares; end
-        module Residual_Sum_Of_Squares; end
-        module Total_Least_Squares; end
-        module Total_Sum_Of_Squares; end
-      end
-    end
-    module EstimationOfDensity
-      module Cluster_Weighted_Modeling; end
-      module Density_Estimation; end
-      module Discretization_Of_Continuous_Features; end
-      module Mean_Integrated_Squared_Error; end
-      module Multivariate_Kernel_Density_Estimation; end
-      module Variable_Kernel_Density_Estimation; end
-    end
-    module ExploratoryDataAnalysis
-    # primarily EDA is for seeing what the data can
-    # tell us beyond the formal modeling or hypothesis testing task.
-    # The output will be a visual material.
-      module Data_Reduction; end
-      module Table_Diagonalization; end
-      module Configural_Frequency_Analysis; end
-      module Median_Polish; end
-      module Stem_And_Leaf_Display; end
-    end
-    module Data_Mining
-      module Applied_DataMining; end
-      module Cluster_Analysis; end
-      module Dimension_Reduction; end
-      module Applied_DataMining; end
-    end
-    module RegressionAnalysis
-      module Choice_Modelling; end
-      module Generalized_Linear_Model
-        module Binomial_Regression; end
-        module Generalized_Additive_Model; end
-        module Linear_Probability_Model; end
-        module Poisson_Regression; end
-        module Zero_Inflated_Model; end
-      end
-      module Nonparametric_Regression; end
-      module Statistical_Outliers; end
-      module Regression_And_Curve_Fitting_Software; end
-      module Regression_Diagnostics; end
-      module Regression_Variable_Selection; end
-      module Regression_With_Time_Series_Structure; end
-      module Robust_Regression; end
-      module Choice_Modeling; end
-    end
-    module Resampling
-      module Bootstrapping_Population; end
-    end
-    module Sensitivity_Analysis
-      module Variance_Based_Sensitivity_Analysis; end
-      module Elementary_Effects_Method; end
-      module Experimental_Uncertainty_Analysis; end
-      module Fourier_Amplitude_Sensitivity_Testing; end
-      module Hyperparameter; end
-    end
-    module Time_series_Analysis
-      module Frequency_Deviation; end
-    end
-  end
-  class ExportData
-    def write_spreadsheet(data, data_analysis_name)
-      begin data_analysis.is_a? String
-        @data_analysis_name = data_analysis_name.split.join.gsub('_', ' ').downcase.to_s
-        case data_analysis_name
-        when "coefficient of determination"
-          write_coefficient_of_determination
-        when  "discrete least squares meshless method"
-          write_discrete_least_squares_meshless_method
-        when "discrete least squares meshless method"
-          write_discrete_least_squares_meshless_method
-        else
-        end
-      rescue
-        raise ArgumentError, 'invalid attribute'
-      end
-    end
-    def write_coefficient_of_determination
-      book = Spreadsheet::Workbook.new
-      sheet1 = book.create_worksheet name: "Data Analysis: #{@data_analysis_name}"
-      sheet1.row(0).concat %w{title released_date worldwide_gross}
-      data.each_with_index do |value, index|
-        sheet1[1, index] = "#{value}"
-      end
-    end
-  end
-end

data/lib/movieDB/data_export.rb DELETED

@@ -1,96 +0,0 @@
-require "spreadsheet"
-require "redis"
-require "json"
-# Movie data fetched from IMDb is stored as a hash data type in redis.
-# The key and values are written into a spreadsheet for later data analysis.
-module MovieDB
-  module DataExport
-    IMDB_ATTRIBUTES_HEADERS = %w(title cast_members cast_characters cast_member_ids cast_members_characters
-                    trailer_url director writers filming_locations company genres languages countries
-                    length plot poster rating votes mpaa_rating tagline year release_date revenue)
-    def export_movie_data(db_redis, imdb_ids)
-      @db_redis = db_redis
-      @imdb_ids = imdb_ids
-      create_spreadsheet_file
-      create_spreadsheet_report
-      write_xls_file
-    end
-    def create_spreadsheet_file
-      directory_name = 'reports'
-      create_directory(directory_name)
-      Spreadsheet.client_encoding = 'UTF-8'
-      @book = Spreadsheet::Workbook.new
-      @sheet = @book.create_worksheet
-      @sheet.name = report_name if @db_redis
-      @sheet.name = "Data Analysis: #{$DATA_ANALYSIS_NAME}" if $DATA_ANALYSIS_NAME
-    end
-    def create_directory(directory_name)
-      Dir.mkdir(directory_name) unless File.exists? directory_name
-    end
-    def create_spreadsheet_report
-      create_spreadsheet_header
-      create_spreadsheet_body
-    end
-    def create_spreadsheet_header
-      @sheet.row(0).concat MovieDB::DataExport::IMDB_ATTRIBUTES_HEADERS
-      title_format = Spreadsheet::Format.new :color => :blue, :weight => :bold, :size => 13
-      float_format = Spreadsheet::Format.new :number_format => "0.00"
-      @sheet.row(0).default_format = title_format
-      @sheet.column(1).default_format = float_format
-      @sheet.column(16).default_format = float_format
-      @sheet.column(22).default_format = float_format
-    end
-    # We write the all keys and values from our data set to the spreadsheet
-    def create_spreadsheet_body
-      @imdb_ids.each_with_index do |imdb_id, idx|
-        row = @sheet.row(idx + 1)
-        MovieDB::DataExport::IMDB_ATTRIBUTES_HEADERS.each do |attr_key|
-          string_values = ['title', 'language', 'length', 'rating', 'vote', 'release', 'mpaa_rating', 'year', 'revenue']
-          # Check to see if the fetch redis value is in a JSON
-          begin
-            movie_value = JSON.parse(@db_redis.hget "movie:#{imdb_id}", "#{attr_key}")
-          rescue => e
-            movie_value = [] << (@db_redis.hget "movie:#{imdb_id}", "#{attr_key}")
-          end
-          row.push(movie_value.map { |t| t }.join(' ')) if ([].unshift attr_key).any? { |v| string_values.include?(v) }
-          row.push movie_value.length if (movie_value.is_a? Array) && ([].unshift attr_key).any? { |v| !string_values.include?(v) }
-          row.push(movie_value) if movie_value.is_a? String
-        end
-      end
-    end
-    def report_name
-      name = "imdb_"
-      @imdb_ids.each do |imdb_id|
-        name << (@db_redis.hget "movie:#{imdb_id}", "title").gsub(' ', '')
-        name << '_' unless @imdb_ids.length == imdb_id
-      end
-      return name
-    end
-    def write_xls_file
-      filename = ("#{report_name}.xls")
-      @book.write File.join('reports', filename)
-      return filename
-    end
-  end
-end

data/lib/movieDB/data_process.rb DELETED

@@ -1,26 +0,0 @@
-require 'MovieDB/data_analysis'
-module MovieDB
-  class DataProcess
-    PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
-    extend PATH_AOV::Statistic
-    extend PATH_AOV::Coefficient_Of_Determination
-    include PATH_AOV::Explained_Sum_Of_Squares
-    include PATH_AOV::Fraction_Of_Variance_Unexplained
-    include PATH_AOV::Gauss_Newton_Algorithm
-    include PATH_AOV::Iteratively_Reweighted_Least_Squares
-    include PATH_AOV::Lack_Of_Fit_Sum_Of_Squares
-    include PATH_AOV::Least_Squares_Support_Vector_Machine
-    include PATH_AOV::Mean_Squared_Error
-    include PATH_AOV::Non_Linear_Iterative_Partial_Least_Squares
-    include PATH_AOV::Non_Linear_Least_Squares
-    include PATH_AOV::Ordinary_Least_Squares
-    include PATH_AOV::Partial_Least_Squares_Regression
-    include PATH_AOV::Partition_Of_Sums_Of_Squares
-    include PATH_AOV::Residual_Sum_Of_Squares
-    include PATH_AOV::Total_Least_Squares
-    include PATH_AOV::Total_Sum_Of_Squares
-  end
-end

data/lib/movieDB/movie_error.rb DELETED

@@ -1,20 +0,0 @@
-  ##
-  #TODO: Re-word the responses to be human readable.
-module MovieDB
- module MovieError
-   def raise_errors(response)
-     case response.to_i
-       when 200
-        raise OK, "(#{response}: Successful )"
-       when 404
-         raise NotFound, "(#{response}: Resource Not found)"
-       when 500
-         raise Lookup, "(#{response}: Internal Server Error.)"
-       when 503
-         raise Unavailable, "(#{response}: Resource is Unavailable.)"
-       else
-     end
-   end
- end
-end

data/lib/movieDB/status_checker.rb DELETED

@@ -1,48 +0,0 @@
-module MovieDB
-  module StatusChecker
-  # Check the film release and updates the status.
-  #
-  # Example of checking for status:
-  #
-  #   movie = Movie.new(film_release: ['theatrical', 'print'])
-  #   movie.status_check
-    def self.included(base)
-      base.class_eval {
-        def theatrical_released?
-          self.movie_status == 'theartrical'
-        end
-        def video_released?
-          self.movie_status == 'video'
-        end
-        def television_released?
-          self.movie_status == 'television'
-        end
-        def internet_released?
-          self.movie_status == 'internet'
-        end
-        def print_released?
-          self.movie_status == 'print'
-        end
-        def status_check
-          case
-          when self.theatrical_released? && self.television_released? && self.video_released? && self.print_released?
-            "Wide Release"
-          when self.theatrical_released? && self.print_released?
-            "Modified Wide Release"
-          when self.theatrical_released? && (self.internet_released? || self.print_released?)
-            "Exclusive and Limited Runs"
-          when self.theatrical_released? || self.television_released? || self.video_released? || self.print_released?
-            "Territorial Saturation"
-          else
-            "Not Released"
-          end
-        end
-      }
-    end
-  end
-end