movieDB 0.1.9 → 0.1.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/lib/movieDB.rb +1 -1
- data/lib/movieDB/base.rb +3 -3
- data/lib/movieDB/data_analysis.rb +50 -6
- data/lib/movieDB/data_export.rb +13 -26
- data/lib/movieDB/data_process.rb +24 -0
- data/lib/movieDB/version.rb +1 -1
- data/reports/Coefficient_Of_Determination_2013121918.xls +0 -0
- data/reports/add.xls +0 -0
- data/reports/imdb_raw_data_2013121911.xls +0 -0
- data/reports/imdb_raw_data_2013121912.xls +0 -0
- data/spec/{data_analysis_spec.rb → data_process_spec.rb} +18 -13
- data/spec/spec_helper.rb +1 -1
- metadata +9 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d50cca9ba4a8eab49599c8fd2dbe0ab856993d34
|
4
|
+
data.tar.gz: 0f8f434ce0c78e6b4604cc326717b75373777818
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: aba42600c6385faa6383be3e23eae5fa604349de78c2d607d38d6ac34ceaf91239d14db5bb652f14c16c879973acba335617ef9e3e9537bb8e6b46af9e0ee713
|
7
|
+
data.tar.gz: 7b2f064cd6dbc380a1f798923b6ba6428af2b4d161a3421c92eaab1d348a636d5419c17a75577a350fe075133f004f98bc7c456567bc22968c14f4ebdb3c53d6
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
## Description
|
4
4
|
|
5
|
-
Although the name suggests a datastore gem, MovieDB is actually a ruby wrapper that inspects, cleans, transform and model imdb data and provides useful data analysis information, suggesting conclusion. The objective is provide a tool that
|
5
|
+
Although the name suggests a datastore gem, MovieDB is actually a ruby wrapper that inspects, cleans, transform and model imdb data and provides useful data analysis information, suggesting conclusion. The objective and usage is to provide a tool that can aide movie/film producers make statistical decisions based off archival imdb data.
|
6
6
|
|
7
7
|
Basic functions and Data Analysis:
|
8
8
|
* Data Analysis
|
@@ -35,7 +35,7 @@ Or install it yourself as:
|
|
35
35
|
|
36
36
|
> require 'MovieDB/data_export'
|
37
37
|
|
38
|
-
> MovieDB::Movie.clear_data_store
|
38
|
+
> MovieDB::Movie.clear_data_store **ONLY IF YOUR WANT TO EMPTY YOUR DATASTORE (ARRAY)
|
39
39
|
|
40
40
|
> MovieDB::Movie.send(:get_multiple_imdb_movie_data, "2024544", "1800241", "0791314")
|
41
41
|
|
@@ -43,9 +43,9 @@ Or install it yourself as:
|
|
43
43
|
|
44
44
|
## Exported Document
|
45
45
|
|
46
|
-
The
|
46
|
+
The exported movie data is stored in your reports directory.
|
47
47
|
|
48
|
-
$ cd /reports/
|
48
|
+
$ cd /reports/imdb_raw_data_20131216.xls
|
49
49
|
|
50
50
|
## Usage - Data Analysis
|
51
51
|
|
data/lib/movieDB.rb
CHANGED
@@ -179,7 +179,7 @@ unless defined? MovieDB::Movie
|
|
179
179
|
movie_info.poster = Array.new << @movie_data.poster
|
180
180
|
movie_info.rating = Array.new << @movie_data.rating
|
181
181
|
movie_info.votes = Array.new << @movie_data.votes
|
182
|
-
movie_info.mpaa_rating = Array.new << @movie_data.mpaa_rating == [nil] ? ["
|
182
|
+
movie_info.mpaa_rating = Array.new << @movie_data.mpaa_rating == [nil] ? ["Not Rated"] : [@movie_data.mpaa_rating]
|
183
183
|
movie_info.tagline = Array.new << @movie_data.tagline
|
184
184
|
movie_info.year = Array.new << @movie_data.year
|
185
185
|
movie_info.release_date = Array.new << @movie_data.release_date
|
data/lib/movieDB/base.rb
CHANGED
@@ -3,9 +3,9 @@ require 'MovieDB/status_checker'
|
|
3
3
|
require 'MovieDB/movie_error'
|
4
4
|
|
5
5
|
module MovieDB #:nodoc
|
6
|
-
# MoviesDB v0.1.x is not a datastore gem. Rather is
|
7
|
-
#
|
8
|
-
#
|
6
|
+
# MoviesDB v0.1.x is not a datastore gem. Rather, it is a high-level statistical software that performs
|
7
|
+
# mathematical computations for analyzing film data from imdb.
|
8
|
+
# In a nut shell, it is a solution to the common problem of deducing logical hypothesis based off data sets.
|
9
9
|
|
10
10
|
class Base
|
11
11
|
include StatusChecker
|
@@ -9,11 +9,55 @@ module MovieDB
|
|
9
9
|
module AnalysisOfVariance
|
10
10
|
module LeastSquares
|
11
11
|
module Coefficient_Of_Determination
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
12
|
+
def coefficient_of_determination (directory_name)
|
13
|
+
open_spreadsheet(directory_name)
|
14
|
+
perform_computation
|
15
|
+
insert_data_to_existing_xls_file
|
16
|
+
end
|
17
|
+
|
18
|
+
def open_spreadsheet(directory_name)
|
19
|
+
@book = Spreadsheet.open File.join('reports', directory_name)
|
20
|
+
@sheet = @book.worksheet(0)
|
21
|
+
end
|
22
|
+
|
23
|
+
def perform_computation
|
24
|
+
@col_0 = []
|
25
|
+
|
26
|
+
@sheet.each_with_index do |row, i|
|
27
|
+
@col_0 << @sheet[i, 1]
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Perform computation on the data collected
|
32
|
+
# TODO: Need to use coefficienct statistical formula
|
33
|
+
# Calculate median as an example but COD formula must be used
|
34
|
+
|
35
|
+
drop_header = @col_0.shift
|
36
|
+
|
37
|
+
row_count = @sheet.rows.count
|
38
|
+
column_count = @sheet.columns[0].count
|
39
|
+
|
40
|
+
@data_processing = @col_0.inject do |sum, n|
|
41
|
+
(sum + n)/(row_count-1)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def report_name
|
46
|
+
module_nesting = Module.nesting[0].to_s.gsub('::', ' ').split()
|
47
|
+
count = module_nesting.size
|
48
|
+
@data_analysis_name = module_nesting[count - 1]
|
49
|
+
@data_analysis_name << '_' << "#{Time.now.to_s.gsub(':', '').gsub('-', '').gsub(' ', '').split('')[0..9].join}"
|
50
|
+
end
|
51
|
+
|
52
|
+
def insert_data_to_existing_xls_file
|
53
|
+
filename = ("#{report_name}.xls")
|
54
|
+
#@book.worksheet(0).insert_row(4, [@data_processing ])
|
55
|
+
@sheet[5, 1] = @data_processing
|
56
|
+
@sheet.row(6).push "Median", @data_processing
|
57
|
+
|
58
|
+
@book.write File.join('reports', filename)
|
59
|
+
return filename
|
60
|
+
end
|
17
61
|
end
|
18
62
|
module Discrete_Least_Squares_Meshless_Method; end
|
19
63
|
module Explained_Sum_Of_Squares; end
|
@@ -23,7 +67,7 @@ module MovieDB
|
|
23
67
|
module Lack_Of_Fit_Sum_Of_Squares; end
|
24
68
|
module Least_Squares_Support_Vector_Machine; end
|
25
69
|
module Mean_Squared_Error; end
|
26
|
-
module
|
70
|
+
module Moving_Least_Sqares; end
|
27
71
|
module Non_Linear_Iterative_Partial_Least_Squares; end
|
28
72
|
module Non_Linear_Least_Squares; end
|
29
73
|
module Ordinary_Least_Squares; end
|
data/lib/movieDB/data_export.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require "spreadsheet"
|
2
|
-
require "MovieDB/data_analysis"
|
3
2
|
require "MovieDB"
|
4
3
|
|
5
4
|
# This module will write xls document to file
|
@@ -8,25 +7,6 @@ require "MovieDB"
|
|
8
7
|
|
9
8
|
module MovieDB
|
10
9
|
class DataExport < MovieDB::Movie
|
11
|
-
PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
|
12
|
-
include PATH_AOV::Coefficient_Of_Determination
|
13
|
-
include PATH_AOV::Explained_Sum_Of_Squares
|
14
|
-
include PATH_AOV::Fraction_Of_Variance_Unexplained
|
15
|
-
include PATH_AOV::Gauss_Newton_Algorithm
|
16
|
-
include PATH_AOV::Iteratively_Reweighted_Least_Squares
|
17
|
-
include PATH_AOV::Lack_Of_Fit_Sum_Of_Squares
|
18
|
-
include PATH_AOV::Least_Squares_Support_Vector_Machine
|
19
|
-
include PATH_AOV::Mean_Squared_Error
|
20
|
-
include PATH_AOV::Moving_Least_Squares
|
21
|
-
include PATH_AOV::Non_Linear_Iterative_Partial_Least_Squares
|
22
|
-
include PATH_AOV::Non_Linear_Least_Squares
|
23
|
-
include PATH_AOV::Ordinary_Least_Squares
|
24
|
-
include PATH_AOV::Partial_Least_Squares_Regression
|
25
|
-
include PATH_AOV::Partition_Of_Sums_Of_Squares
|
26
|
-
include PATH_AOV::Residual_Sum_Of_Squares
|
27
|
-
include PATH_AOV::Total_Least_Squares
|
28
|
-
include PATH_AOV::Total_Sum_Of_Squares
|
29
|
-
|
30
10
|
class << self
|
31
11
|
#TODO: Check the data analysis(DA) name. Write a define_method and include the DA.
|
32
12
|
|
@@ -57,10 +37,15 @@ module MovieDB
|
|
57
37
|
def create_spreadsheet_header
|
58
38
|
@sheet.row(0).concat $IMDB_ATTRIBUTES_HEADERS
|
59
39
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
40
|
+
title_format = Spreadsheet::Format.new :color => :blue,
|
41
|
+
:weight => :bold,
|
42
|
+
:size => 13
|
43
|
+
|
44
|
+
float_format = Spreadsheet::Format.new :number_format => "0.00"
|
45
|
+
|
46
|
+
@sheet.row(0).default_format = title_format
|
47
|
+
@sheet.column(1).default_format = float_format
|
48
|
+
@sheet.column(16).default_format = float_format
|
64
49
|
end
|
65
50
|
|
66
51
|
# Loop through array of and array imbd data. Each row has the
|
@@ -90,8 +75,10 @@ module MovieDB
|
|
90
75
|
when 'rating' then spreadsheet_body_numeric_data("rating")
|
91
76
|
when 'votes' then spreadsheet_body_numeric_data("votes")
|
92
77
|
when 'mpaa_rating' then spreadsheet_body_numeric_data("mpaa_rating")
|
78
|
+
when 'tagline' then spreadsheet_body_text_data("tagline")
|
93
79
|
when 'year' then spreadsheet_body_numeric_data("year")
|
94
80
|
when 'release_date' then spreadsheet_body_numeric_data("release_date")
|
81
|
+
else
|
95
82
|
end
|
96
83
|
end
|
97
84
|
end
|
@@ -99,7 +86,7 @@ module MovieDB
|
|
99
86
|
def spreadsheet_body_text_data(header_title)
|
100
87
|
@e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}.flatten
|
101
88
|
|
102
|
-
element_title.each_with_index do |element2,i|
|
89
|
+
element_title.each_with_index do |element2, i|
|
103
90
|
element_array = element_title[(i)].split(' ',)
|
104
91
|
@sheet.row(1 + i).concat element_array
|
105
92
|
end
|
@@ -119,7 +106,7 @@ module MovieDB
|
|
119
106
|
def spreadsheet_body_numeric_data(header_title)
|
120
107
|
@e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
|
121
108
|
|
122
|
-
element_title.each_with_index do |element2,i|
|
109
|
+
element_title.each_with_index do |element2, i|
|
123
110
|
element_array = element_title[(i)]
|
124
111
|
@sheet.row(1 + i).concat element_array
|
125
112
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'MovieDB/data_analysis'
|
2
|
+
|
3
|
+
module MovieDB
|
4
|
+
class DataProcess
|
5
|
+
PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
|
6
|
+
extend PATH_AOV::Coefficient_Of_Determination
|
7
|
+
include PATH_AOV::Explained_Sum_Of_Squares
|
8
|
+
include PATH_AOV::Fraction_Of_Variance_Unexplained
|
9
|
+
include PATH_AOV::Gauss_Newton_Algorithm
|
10
|
+
include PATH_AOV::Iteratively_Reweighted_Least_Squares
|
11
|
+
include PATH_AOV::Lack_Of_Fit_Sum_Of_Squares
|
12
|
+
include PATH_AOV::Least_Squares_Support_Vector_Machine
|
13
|
+
include PATH_AOV::Mean_Squared_Error
|
14
|
+
include PATH_AOV::Non_Linear_Iterative_Partial_Least_Squares
|
15
|
+
include PATH_AOV::Non_Linear_Least_Squares
|
16
|
+
include PATH_AOV::Ordinary_Least_Squares
|
17
|
+
include PATH_AOV::Partial_Least_Squares_Regression
|
18
|
+
include PATH_AOV::Partition_Of_Sums_Of_Squares
|
19
|
+
include PATH_AOV::Residual_Sum_Of_Squares
|
20
|
+
include PATH_AOV::Total_Least_Squares
|
21
|
+
include PATH_AOV::Total_Sum_Of_Squares
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
data/lib/movieDB/version.rb
CHANGED
Binary file
|
data/reports/add.xls
ADDED
Binary file
|
Binary file
|
Binary file
|
@@ -1,45 +1,50 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe MovieDB::
|
3
|
+
describe MovieDB::DataProcess do
|
4
4
|
|
5
5
|
describe "#AnalysisOfVariance" do
|
6
6
|
describe "#LeastSquares" do
|
7
7
|
describe "#Coefficient_Of_Determination" do
|
8
|
-
let(:
|
9
|
-
|
8
|
+
let(:cod) {MovieDB::DataProcess}
|
9
|
+
#let(:cod) {MovieDB::DataProcess.analyze_cod(imdb_raw_data_2013121820.xls)}
|
10
|
+
|
10
11
|
it "should return the cof" do
|
11
|
-
|
12
|
+
cod.send(:coefficient_of_determination, 'imdb_raw_data_2013121911.xls').should == []
|
12
13
|
end
|
14
|
+
|
15
|
+
it "raise error if file does not exist" do
|
16
|
+
|
17
|
+
end
|
13
18
|
end
|
14
|
-
|
19
|
+
|
15
20
|
describe "#Discrete_Least_Squares_Meshless_Method" do
|
16
21
|
pending
|
17
22
|
end
|
18
|
-
|
23
|
+
|
19
24
|
describe "#Explained_Sum_Of_Squares" do
|
20
25
|
pending
|
21
|
-
end
|
22
|
-
|
26
|
+
end
|
27
|
+
|
23
28
|
describe "#Fraction_Of_Variance_Unexplained" do
|
24
29
|
pending
|
25
30
|
end
|
26
|
-
|
31
|
+
|
27
32
|
describe "#Gauss_Newton_Algorithm" do
|
28
33
|
pending
|
29
34
|
end
|
30
|
-
|
35
|
+
|
31
36
|
describe "#Iteratively_Reweighted_Least_Squares" do
|
32
37
|
pending
|
33
38
|
end
|
34
|
-
|
39
|
+
|
35
40
|
describe "#Lack_Of_Fit_Sum_Of_Squares" do
|
36
41
|
pending
|
37
42
|
end
|
38
|
-
|
43
|
+
|
39
44
|
describe "#Least_Squares_Support_Vector_Machine" do
|
40
45
|
pending
|
41
46
|
end
|
42
|
-
|
47
|
+
|
43
48
|
describe "#Mean_Squared_Error" do
|
44
49
|
pending
|
45
50
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: movieDB
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Albert_McKeever
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -154,6 +154,7 @@ files:
|
|
154
154
|
- lib/movieDB/base.rb
|
155
155
|
- lib/movieDB/data_analysis.rb
|
156
156
|
- lib/movieDB/data_export.rb
|
157
|
+
- lib/movieDB/data_process.rb
|
157
158
|
- lib/movieDB/genre_parser.rb
|
158
159
|
- lib/movieDB/genres/en.txt
|
159
160
|
- lib/movieDB/movie_error.rb
|
@@ -163,9 +164,13 @@ files:
|
|
163
164
|
- movieDB.gemspec
|
164
165
|
- npm-debug.log
|
165
166
|
- reports/.DS_Store
|
167
|
+
- reports/Coefficient_Of_Determination_2013121918.xls
|
168
|
+
- reports/add.xls
|
169
|
+
- reports/imdb_raw_data_2013121911.xls
|
170
|
+
- reports/imdb_raw_data_2013121912.xls
|
166
171
|
- spec/.DS_Store
|
167
|
-
- spec/data_analysis_spec.rb
|
168
172
|
- spec/data_export_spec.rb
|
173
|
+
- spec/data_process_spec.rb
|
169
174
|
- spec/movieDB_spec.rb
|
170
175
|
- spec/person_spec.rb
|
171
176
|
- spec/spec_helper.rb
|
@@ -195,8 +200,8 @@ specification_version: 4
|
|
195
200
|
summary: Movie/Film Statistic and Data Analysis
|
196
201
|
test_files:
|
197
202
|
- spec/.DS_Store
|
198
|
-
- spec/data_analysis_spec.rb
|
199
203
|
- spec/data_export_spec.rb
|
204
|
+
- spec/data_process_spec.rb
|
200
205
|
- spec/movieDB_spec.rb
|
201
206
|
- spec/person_spec.rb
|
202
207
|
- spec/spec_helper.rb
|