movieDB 0.1.10 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d50cca9ba4a8eab49599c8fd2dbe0ab856993d34
4
- data.tar.gz: 0f8f434ce0c78e6b4604cc326717b75373777818
3
+ metadata.gz: 4d6f86e45dd6df3973448eb08ac5e76258be9fe6
4
+ data.tar.gz: 740512f4b3c575ca884148584b24eaae6b62ea05
5
5
  SHA512:
6
- metadata.gz: aba42600c6385faa6383be3e23eae5fa604349de78c2d607d38d6ac34ceaf91239d14db5bb652f14c16c879973acba335617ef9e3e9537bb8e6b46af9e0ee713
7
- data.tar.gz: 7b2f064cd6dbc380a1f798923b6ba6428af2b4d161a3421c92eaab1d348a636d5419c17a75577a350fe075133f004f98bc7c456567bc22968c14f4ebdb3c53d6
6
+ metadata.gz: 8416c049afe77426e2fd6daf29b91e0f60528cf24309effd09d072c10330a9f1d5fa27bf8d59eee2029b4be805ccf25d5253c4205dad1c485c738fce253698c1
7
+ data.tar.gz: 23f5c489154e0c4ce12452056cd4a5a5465de77df80fa26a21e74bfb9eefc85bbfb09922f6bb728411737016bed2d44980ba93bdede2c7929daf782981ce0a79
data/README.md CHANGED
@@ -47,10 +47,99 @@ The exported movie data is stored in your reports directory.
47
47
 
48
48
  $ cd /reports/imdb_raw_data_20131216.xls
49
49
 
50
- ## Usage - Data Analysis
50
+ ## Usage - Analysing Data and Generating Stats
51
51
 
52
52
  $ irb
53
53
 
54
54
  > require 'MovieDB/data_analysis'
55
55
 
56
- > "work-in-progress"
56
+ > require 'MovieDB/data_process'
57
+
58
+ > MovieDB::DataProcess.send(:basic_statistics, 'imdb_raw_data_20131216.xls')
59
+
60
+ ## Exported - Analyzed Data
61
+
62
+ The exported analyzed data is stored in your reports directory.
63
+
64
+ $ cd /reports/basic_statistic_20131216.xls
65
+
66
+ ## What's Next
67
+
68
+ More statistical computations coming. This includes:
69
+
70
+ > Gauss_Newton_Algorithm
71
+ > Iteratively_Reweighted_Least_Squares
72
+ > Lack_Of_Fit_Sum_Of_Squares
73
+ > Least_Squares_Support_Vector_Machine
74
+ > Mean_Squared_Error
75
+ > Moving_Least_Sqares
76
+ > Non_Linear_Iterative_Partial_Least_Squares
77
+ > Non_Linear_Least_Squares
78
+ > Ordinary_Least_Squares
79
+ > Partial_Least_Squares_Regression
80
+ > Partition_Of_Sums_Of_Squares
81
+ > Proofs_Involving_Ordinary_Least_Squares
82
+ > Residual_Sum_Of_Squares
83
+ > Total_Least_Squares
84
+ > Total_Sum_Of_Squares
85
+
86
+ > EstimationOfDensity
87
+ > Cluster_Weighted_Modeling
88
+ > Density_Estimation
89
+ > Discretization_Of_Continuous_Features
90
+ > Mean_Integrated_Squared_Error
91
+ > Multivariate_Kernel_Density_Estimation
92
+ > Variable_Kernel_Density_Estimation
93
+
94
+ > ExploratoryDataAnalysis
95
+ > Data_Reduction
96
+ > Table_Diagonalization
97
+ > Configural_Frequency_Analysis
98
+ > Median_Polish
99
+ > Stem_And_Leaf_Display
100
+
101
+ > Data_Mining
102
+ > Applied_DataMining
103
+ > Cluster_Analysis
104
+ > Dimension_Reduction
105
+ > Applied_DataMining
106
+
107
+ > RegressionAnalysis
108
+ > Choice_Modelling
109
+
110
+ > Generalized_Linear_Model
111
+ > Binomial_Regression
112
+ > Generalized_Additive_Model
113
+ > Linear_Probability_Model
114
+ > Poisson_Regression
115
+ > Zero_Inflated_Model
116
+
117
+ > Nonparametric_Regression
118
+ > Statistical_Outliers
119
+ > Regression_And_Curve_Fitting_Software
120
+ > Regression_Diagnostics
121
+ > Regression_Variable_Selection
122
+ > Regression_With_Time_Series_Structure
123
+ > Robust_Regression
124
+ > Choice_Modeling
125
+
126
+ > Resampling
127
+ > Bootstrapping_Population
128
+
129
+ > Sensitivity_Analysis
130
+ > Variance_Based_Sensitivity_Analysis
131
+ > Elementary_Effects_Method
132
+ > Experimental_Uncertainty_Analysis
133
+ > Fourier_Amplitude_Sensitivity_Testing
134
+ > Hyperparameter
135
+
136
+ > Time_series_Analysis
137
+ > Frequency_Deviation
138
+
139
+ ## Contact me
140
+
141
+ If you'd like to collaborate, please feel free to fork source code on github.
142
+
143
+ Also, You can also contact me at albertmck@gmail.com
144
+
145
+
@@ -8,8 +8,9 @@ module MovieDB
8
8
  class DataAnalysis < MovieDB::Movie
9
9
  module AnalysisOfVariance
10
10
  module LeastSquares
11
- module Coefficient_Of_Determination
12
- def coefficient_of_determination (directory_name)
11
+ module Statistic
12
+
13
+ def basic_statistic (directory_name)
13
14
  open_spreadsheet(directory_name)
14
15
  perform_computation
15
16
  insert_data_to_existing_xls_file
@@ -21,25 +22,102 @@ module MovieDB
21
22
  end
22
23
 
23
24
  def perform_computation
24
- @col_0 = []
25
25
 
26
- @sheet.each_with_index do |row, i|
27
- @col_0 << @sheet[i, 1]
28
- end
26
+ total_columns = 17
27
+ @column = []
28
+ @row_count = @sheet.rows.count
29
+
30
+ 1.upto(total_columns) do |c|
31
+ @column = [] # set instance variable to an empty array
32
+
33
+ ##
34
+ # loop through to collect all elements
35
+ # The returned array includes both strings and integers elements
36
+
37
+ @sheet.each_with_index do |row, i|
38
+ @column << @sheet[i, 0 + c ]
39
+ end
40
+
41
+ @column.shift # delete the string header from the array
42
+ @column.compact! # delete nil from the array
43
+ row_count = @sheet.rows.count
44
+
45
+ ##
46
+ # Perform computation on the data collected
47
+ # TODO: Need to use coefficienct statistical formula
48
+ # Calculate median as an example but COD formula must be used
49
+
50
+
51
+ if @column.all? {|i| (1..99999999).include? (i)}
52
+
53
+ n = @column.count
54
+ @column.sort!
55
+
56
+ ##
57
+ # Mean is commonly called as average.Mean or Average is defined as the sum of
58
+ # all the given elements divided by the total number of elements.
59
+ #
60
+ # Range is the difference between the highest and the lowest values in a
61
+ # frequency distribution.
62
+ #
63
+ # Mode is the most frequently occurring value in a frequency distribution.
29
64
 
30
- ##
31
- # Perform computation on the data collected
32
- # TODO: Need to use coefficienct statistical formula
33
- # Calculate median as an example but COD formula must be used
65
+ @mean = @column.sum/n # Find the mean
66
+ @range = @column.max - @column.min # Find the range
34
67
 
35
- drop_header = @col_0.shift
68
+ freq = @column.inject(Hash.new(0)) { |h, v| h[v] += 1; h }
69
+ @mode = @column.sort_by { |v| freq[v]}.last # Find the mode
36
70
 
37
- row_count = @sheet.rows.count
38
- column_count = @sheet.columns[0].count
71
+ ##
72
+ # Calculate Standard Deviation
73
+ # Standard deviation is a statistical measure of spread or variability.
74
+ # The standard deviation is the root mean square (RMS) deviation of the
75
+ # values from their arithmetic mean.
39
76
 
40
- @data_processing = @col_0.inject do |sum, n|
41
- (sum + n)/(row_count-1)
42
- end
77
+ @column_squared = []
78
+ @column.each do |col|
79
+ @column_squared << col**2
80
+ end
81
+
82
+ @sum_of_column = @column.sum
83
+ @sum_of_column_squared = @column_squared.sum
84
+ @standard_dev = Math.sqrt((@sum_of_column_squared -((@sum_of_column)*(@sum_of_column)/n))/(n-1))
85
+
86
+ if n.odd?
87
+ index = (n + 1)/2
88
+ @median = @column[index - 1] # Subtract -1 to reduce index value since array start with an index 0.
89
+ else
90
+ middle_index = n/2
91
+ right_index = middle_index + 1
92
+ @median = (@column[middle_index - 1] + @column[right_index - 1])/2
93
+ end
94
+
95
+ else
96
+ @median = "N/A"
97
+ @mean = "N/A"
98
+ @range = "N/A"
99
+ @mode = "N/A"
100
+ @standard_dev = "N/A"
101
+ end
102
+
103
+ ##
104
+ # Insert results into spreadsheet cell
105
+
106
+ @sheet[@row_count + 2, 0 ] = "Mean"
107
+ @sheet[@row_count + 2, 0 + c ] = @mean
108
+
109
+ @sheet[@row_count + 3, 0 ] = "Median"
110
+ @sheet[@row_count + 3, 0 + c ] = @median
111
+
112
+ @sheet[@row_count + 4, 0 ] = "Range"
113
+ @sheet[@row_count + 4, 0 + c ] = @range
114
+
115
+ @sheet[@row_count + 5, 0 ] = "Mode"
116
+ @sheet[@row_count + 5, 0 + c ] = @mode
117
+
118
+ @sheet[@row_count + 6, 0 ] = "Standard Deviation"
119
+ @sheet[@row_count + 6, 0 + c ] = @standard_dev
120
+ end
43
121
  end
44
122
 
45
123
  def report_name
@@ -51,14 +129,11 @@ module MovieDB
51
129
 
52
130
  def insert_data_to_existing_xls_file
53
131
  filename = ("#{report_name}.xls")
54
- #@book.worksheet(0).insert_row(4, [@data_processing ])
55
- @sheet[5, 1] = @data_processing
56
- @sheet.row(6).push "Median", @data_processing
57
-
58
132
  @book.write File.join('reports', filename)
59
133
  return filename
60
134
  end
61
135
  end
136
+ module Coefficient_Of_Determination; end
62
137
  module Discrete_Least_Squares_Meshless_Method; end
63
138
  module Explained_Sum_Of_Squares; end
64
139
  module Fraction_Of_Variance_Unexplained; end
@@ -3,6 +3,7 @@ require 'MovieDB/data_analysis'
3
3
  module MovieDB
4
4
  class DataProcess
5
5
  PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
6
+ extend PATH_AOV::Statistic
6
7
  extend PATH_AOV::Coefficient_Of_Determination
7
8
  include PATH_AOV::Explained_Sum_Of_Squares
8
9
  include PATH_AOV::Fraction_Of_Variance_Unexplained
@@ -1,3 +1,3 @@
1
1
  module MovieDB
2
- VERSION = "0.1.10"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -5,11 +5,10 @@ describe MovieDB::DataProcess do
5
5
  describe "#AnalysisOfVariance" do
6
6
  describe "#LeastSquares" do
7
7
  describe "#Coefficient_Of_Determination" do
8
- let(:cod) {MovieDB::DataProcess}
9
- #let(:cod) {MovieDB::DataProcess.analyze_cod(imdb_raw_data_2013121820.xls)}
8
+ let(:basic_stat) {MovieDB::DataProcess}
10
9
 
11
10
  it "should return the cof" do
12
- cod.send(:coefficient_of_determination, 'imdb_raw_data_2013121911.xls').should == []
11
+ basic_stat.send(:basic_statistic, 'imdb_raw_data_2013121911.xls').should == []
13
12
  end
14
13
 
15
14
  it "raise error if file does not exist" do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: movieDB
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.10
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Albert_McKeever
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-19 00:00:00.000000000 Z
11
+ date: 2013-12-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -164,10 +164,8 @@ files:
164
164
  - movieDB.gemspec
165
165
  - npm-debug.log
166
166
  - reports/.DS_Store
167
- - reports/Coefficient_Of_Determination_2013121918.xls
168
- - reports/add.xls
167
+ - reports/Statistic_2013122017.xls
169
168
  - reports/imdb_raw_data_2013121911.xls
170
- - reports/imdb_raw_data_2013121912.xls
171
169
  - spec/.DS_Store
172
170
  - spec/data_export_spec.rb
173
171
  - spec/data_process_spec.rb
data/reports/add.xls DELETED
Binary file
Binary file