movieDB 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +4 -4
- data/lib/movieDB.rb +1 -1
- data/lib/movieDB/base.rb +3 -3
- data/lib/movieDB/data_analysis.rb +50 -6
- data/lib/movieDB/data_export.rb +13 -26
- data/lib/movieDB/data_process.rb +24 -0
- data/lib/movieDB/version.rb +1 -1
- data/reports/Coefficient_Of_Determination_2013121918.xls +0 -0
- data/reports/add.xls +0 -0
- data/reports/imdb_raw_data_2013121911.xls +0 -0
- data/reports/imdb_raw_data_2013121912.xls +0 -0
- data/spec/{data_analysis_spec.rb → data_process_spec.rb} +18 -13
- data/spec/spec_helper.rb +1 -1
- metadata +9 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d50cca9ba4a8eab49599c8fd2dbe0ab856993d34
|
|
4
|
+
data.tar.gz: 0f8f434ce0c78e6b4604cc326717b75373777818
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: aba42600c6385faa6383be3e23eae5fa604349de78c2d607d38d6ac34ceaf91239d14db5bb652f14c16c879973acba335617ef9e3e9537bb8e6b46af9e0ee713
|
|
7
|
+
data.tar.gz: 7b2f064cd6dbc380a1f798923b6ba6428af2b4d161a3421c92eaab1d348a636d5419c17a75577a350fe075133f004f98bc7c456567bc22968c14f4ebdb3c53d6
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
## Description
|
|
4
4
|
|
|
5
|
-
Although the name suggests a datastore gem, MovieDB is actually a ruby wrapper that inspects, cleans, transform and model imdb data and provides useful data analysis information, suggesting conclusion. The objective is provide a tool that
|
|
5
|
+
Although the name suggests a datastore gem, MovieDB is actually a ruby wrapper that inspects, cleans, transform and model imdb data and provides useful data analysis information, suggesting conclusion. The objective and usage is to provide a tool that can aide movie/film producers make statistical decisions based off archival imdb data.
|
|
6
6
|
|
|
7
7
|
Basic functions and Data Analysis:
|
|
8
8
|
* Data Analysis
|
|
@@ -35,7 +35,7 @@ Or install it yourself as:
|
|
|
35
35
|
|
|
36
36
|
> require 'MovieDB/data_export'
|
|
37
37
|
|
|
38
|
-
> MovieDB::Movie.clear_data_store
|
|
38
|
+
> MovieDB::Movie.clear_data_store **ONLY IF YOUR WANT TO EMPTY YOUR DATASTORE (ARRAY)
|
|
39
39
|
|
|
40
40
|
> MovieDB::Movie.send(:get_multiple_imdb_movie_data, "2024544", "1800241", "0791314")
|
|
41
41
|
|
|
@@ -43,9 +43,9 @@ Or install it yourself as:
|
|
|
43
43
|
|
|
44
44
|
## Exported Document
|
|
45
45
|
|
|
46
|
-
The
|
|
46
|
+
The exported movie data is stored in your reports directory.
|
|
47
47
|
|
|
48
|
-
$ cd /reports/
|
|
48
|
+
$ cd /reports/imdb_raw_data_20131216.xls
|
|
49
49
|
|
|
50
50
|
## Usage - Data Analysis
|
|
51
51
|
|
data/lib/movieDB.rb
CHANGED
|
@@ -179,7 +179,7 @@ unless defined? MovieDB::Movie
|
|
|
179
179
|
movie_info.poster = Array.new << @movie_data.poster
|
|
180
180
|
movie_info.rating = Array.new << @movie_data.rating
|
|
181
181
|
movie_info.votes = Array.new << @movie_data.votes
|
|
182
|
-
movie_info.mpaa_rating = Array.new << @movie_data.mpaa_rating == [nil] ? ["
|
|
182
|
+
movie_info.mpaa_rating = Array.new << @movie_data.mpaa_rating == [nil] ? ["Not Rated"] : [@movie_data.mpaa_rating]
|
|
183
183
|
movie_info.tagline = Array.new << @movie_data.tagline
|
|
184
184
|
movie_info.year = Array.new << @movie_data.year
|
|
185
185
|
movie_info.release_date = Array.new << @movie_data.release_date
|
data/lib/movieDB/base.rb
CHANGED
|
@@ -3,9 +3,9 @@ require 'MovieDB/status_checker'
|
|
|
3
3
|
require 'MovieDB/movie_error'
|
|
4
4
|
|
|
5
5
|
module MovieDB #:nodoc
|
|
6
|
-
# MoviesDB v0.1.x is not a datastore gem. Rather is
|
|
7
|
-
#
|
|
8
|
-
#
|
|
6
|
+
# MoviesDB v0.1.x is not a datastore gem. Rather, it is a high-level statistical software that performs
|
|
7
|
+
# mathematical computations for analyzing film data from imdb.
|
|
8
|
+
# In a nut shell, it is a solution to the common problem of deducing logical hypothesis based off data sets.
|
|
9
9
|
|
|
10
10
|
class Base
|
|
11
11
|
include StatusChecker
|
|
@@ -9,11 +9,55 @@ module MovieDB
|
|
|
9
9
|
module AnalysisOfVariance
|
|
10
10
|
module LeastSquares
|
|
11
11
|
module Coefficient_Of_Determination
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
12
|
+
def coefficient_of_determination (directory_name)
|
|
13
|
+
open_spreadsheet(directory_name)
|
|
14
|
+
perform_computation
|
|
15
|
+
insert_data_to_existing_xls_file
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def open_spreadsheet(directory_name)
|
|
19
|
+
@book = Spreadsheet.open File.join('reports', directory_name)
|
|
20
|
+
@sheet = @book.worksheet(0)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def perform_computation
|
|
24
|
+
@col_0 = []
|
|
25
|
+
|
|
26
|
+
@sheet.each_with_index do |row, i|
|
|
27
|
+
@col_0 << @sheet[i, 1]
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
##
|
|
31
|
+
# Perform computation on the data collected
|
|
32
|
+
# TODO: Need to use coefficienct statistical formula
|
|
33
|
+
# Calculate median as an example but COD formula must be used
|
|
34
|
+
|
|
35
|
+
drop_header = @col_0.shift
|
|
36
|
+
|
|
37
|
+
row_count = @sheet.rows.count
|
|
38
|
+
column_count = @sheet.columns[0].count
|
|
39
|
+
|
|
40
|
+
@data_processing = @col_0.inject do |sum, n|
|
|
41
|
+
(sum + n)/(row_count-1)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def report_name
|
|
46
|
+
module_nesting = Module.nesting[0].to_s.gsub('::', ' ').split()
|
|
47
|
+
count = module_nesting.size
|
|
48
|
+
@data_analysis_name = module_nesting[count - 1]
|
|
49
|
+
@data_analysis_name << '_' << "#{Time.now.to_s.gsub(':', '').gsub('-', '').gsub(' ', '').split('')[0..9].join}"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def insert_data_to_existing_xls_file
|
|
53
|
+
filename = ("#{report_name}.xls")
|
|
54
|
+
#@book.worksheet(0).insert_row(4, [@data_processing ])
|
|
55
|
+
@sheet[5, 1] = @data_processing
|
|
56
|
+
@sheet.row(6).push "Median", @data_processing
|
|
57
|
+
|
|
58
|
+
@book.write File.join('reports', filename)
|
|
59
|
+
return filename
|
|
60
|
+
end
|
|
17
61
|
end
|
|
18
62
|
module Discrete_Least_Squares_Meshless_Method; end
|
|
19
63
|
module Explained_Sum_Of_Squares; end
|
|
@@ -23,7 +67,7 @@ module MovieDB
|
|
|
23
67
|
module Lack_Of_Fit_Sum_Of_Squares; end
|
|
24
68
|
module Least_Squares_Support_Vector_Machine; end
|
|
25
69
|
module Mean_Squared_Error; end
|
|
26
|
-
module
|
|
70
|
+
module Moving_Least_Sqares; end
|
|
27
71
|
module Non_Linear_Iterative_Partial_Least_Squares; end
|
|
28
72
|
module Non_Linear_Least_Squares; end
|
|
29
73
|
module Ordinary_Least_Squares; end
|
data/lib/movieDB/data_export.rb
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
require "spreadsheet"
|
|
2
|
-
require "MovieDB/data_analysis"
|
|
3
2
|
require "MovieDB"
|
|
4
3
|
|
|
5
4
|
# This module will write xls document to file
|
|
@@ -8,25 +7,6 @@ require "MovieDB"
|
|
|
8
7
|
|
|
9
8
|
module MovieDB
|
|
10
9
|
class DataExport < MovieDB::Movie
|
|
11
|
-
PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
|
|
12
|
-
include PATH_AOV::Coefficient_Of_Determination
|
|
13
|
-
include PATH_AOV::Explained_Sum_Of_Squares
|
|
14
|
-
include PATH_AOV::Fraction_Of_Variance_Unexplained
|
|
15
|
-
include PATH_AOV::Gauss_Newton_Algorithm
|
|
16
|
-
include PATH_AOV::Iteratively_Reweighted_Least_Squares
|
|
17
|
-
include PATH_AOV::Lack_Of_Fit_Sum_Of_Squares
|
|
18
|
-
include PATH_AOV::Least_Squares_Support_Vector_Machine
|
|
19
|
-
include PATH_AOV::Mean_Squared_Error
|
|
20
|
-
include PATH_AOV::Moving_Least_Squares
|
|
21
|
-
include PATH_AOV::Non_Linear_Iterative_Partial_Least_Squares
|
|
22
|
-
include PATH_AOV::Non_Linear_Least_Squares
|
|
23
|
-
include PATH_AOV::Ordinary_Least_Squares
|
|
24
|
-
include PATH_AOV::Partial_Least_Squares_Regression
|
|
25
|
-
include PATH_AOV::Partition_Of_Sums_Of_Squares
|
|
26
|
-
include PATH_AOV::Residual_Sum_Of_Squares
|
|
27
|
-
include PATH_AOV::Total_Least_Squares
|
|
28
|
-
include PATH_AOV::Total_Sum_Of_Squares
|
|
29
|
-
|
|
30
10
|
class << self
|
|
31
11
|
#TODO: Check the data analysis(DA) name. Write a define_method and include the DA.
|
|
32
12
|
|
|
@@ -57,10 +37,15 @@ module MovieDB
|
|
|
57
37
|
def create_spreadsheet_header
|
|
58
38
|
@sheet.row(0).concat $IMDB_ATTRIBUTES_HEADERS
|
|
59
39
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
40
|
+
title_format = Spreadsheet::Format.new :color => :blue,
|
|
41
|
+
:weight => :bold,
|
|
42
|
+
:size => 13
|
|
43
|
+
|
|
44
|
+
float_format = Spreadsheet::Format.new :number_format => "0.00"
|
|
45
|
+
|
|
46
|
+
@sheet.row(0).default_format = title_format
|
|
47
|
+
@sheet.column(1).default_format = float_format
|
|
48
|
+
@sheet.column(16).default_format = float_format
|
|
64
49
|
end
|
|
65
50
|
|
|
66
51
|
# Loop through array of and array imbd data. Each row has the
|
|
@@ -90,8 +75,10 @@ module MovieDB
|
|
|
90
75
|
when 'rating' then spreadsheet_body_numeric_data("rating")
|
|
91
76
|
when 'votes' then spreadsheet_body_numeric_data("votes")
|
|
92
77
|
when 'mpaa_rating' then spreadsheet_body_numeric_data("mpaa_rating")
|
|
78
|
+
when 'tagline' then spreadsheet_body_text_data("tagline")
|
|
93
79
|
when 'year' then spreadsheet_body_numeric_data("year")
|
|
94
80
|
when 'release_date' then spreadsheet_body_numeric_data("release_date")
|
|
81
|
+
else
|
|
95
82
|
end
|
|
96
83
|
end
|
|
97
84
|
end
|
|
@@ -99,7 +86,7 @@ module MovieDB
|
|
|
99
86
|
def spreadsheet_body_text_data(header_title)
|
|
100
87
|
@e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}.flatten
|
|
101
88
|
|
|
102
|
-
element_title.each_with_index do |element2,i|
|
|
89
|
+
element_title.each_with_index do |element2, i|
|
|
103
90
|
element_array = element_title[(i)].split(' ',)
|
|
104
91
|
@sheet.row(1 + i).concat element_array
|
|
105
92
|
end
|
|
@@ -119,7 +106,7 @@ module MovieDB
|
|
|
119
106
|
def spreadsheet_body_numeric_data(header_title)
|
|
120
107
|
@e_t = element_title = MovieDB::Movie.instance_eval{filter_movie_attr(header_title)}
|
|
121
108
|
|
|
122
|
-
element_title.each_with_index do |element2,i|
|
|
109
|
+
element_title.each_with_index do |element2, i|
|
|
123
110
|
element_array = element_title[(i)]
|
|
124
111
|
@sheet.row(1 + i).concat element_array
|
|
125
112
|
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
require 'MovieDB/data_analysis'
|
|
2
|
+
|
|
3
|
+
module MovieDB
|
|
4
|
+
class DataProcess
|
|
5
|
+
PATH_AOV = MovieDB::DataAnalysis::AnalysisOfVariance::LeastSquares
|
|
6
|
+
extend PATH_AOV::Coefficient_Of_Determination
|
|
7
|
+
include PATH_AOV::Explained_Sum_Of_Squares
|
|
8
|
+
include PATH_AOV::Fraction_Of_Variance_Unexplained
|
|
9
|
+
include PATH_AOV::Gauss_Newton_Algorithm
|
|
10
|
+
include PATH_AOV::Iteratively_Reweighted_Least_Squares
|
|
11
|
+
include PATH_AOV::Lack_Of_Fit_Sum_Of_Squares
|
|
12
|
+
include PATH_AOV::Least_Squares_Support_Vector_Machine
|
|
13
|
+
include PATH_AOV::Mean_Squared_Error
|
|
14
|
+
include PATH_AOV::Non_Linear_Iterative_Partial_Least_Squares
|
|
15
|
+
include PATH_AOV::Non_Linear_Least_Squares
|
|
16
|
+
include PATH_AOV::Ordinary_Least_Squares
|
|
17
|
+
include PATH_AOV::Partial_Least_Squares_Regression
|
|
18
|
+
include PATH_AOV::Partition_Of_Sums_Of_Squares
|
|
19
|
+
include PATH_AOV::Residual_Sum_Of_Squares
|
|
20
|
+
include PATH_AOV::Total_Least_Squares
|
|
21
|
+
include PATH_AOV::Total_Sum_Of_Squares
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
data/lib/movieDB/version.rb
CHANGED
|
Binary file
|
data/reports/add.xls
ADDED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,45 +1,50 @@
|
|
|
1
1
|
require 'spec_helper'
|
|
2
2
|
|
|
3
|
-
describe MovieDB::
|
|
3
|
+
describe MovieDB::DataProcess do
|
|
4
4
|
|
|
5
5
|
describe "#AnalysisOfVariance" do
|
|
6
6
|
describe "#LeastSquares" do
|
|
7
7
|
describe "#Coefficient_Of_Determination" do
|
|
8
|
-
let(:
|
|
9
|
-
|
|
8
|
+
let(:cod) {MovieDB::DataProcess}
|
|
9
|
+
#let(:cod) {MovieDB::DataProcess.analyze_cod(imdb_raw_data_2013121820.xls)}
|
|
10
|
+
|
|
10
11
|
it "should return the cof" do
|
|
11
|
-
|
|
12
|
+
cod.send(:coefficient_of_determination, 'imdb_raw_data_2013121911.xls').should == []
|
|
12
13
|
end
|
|
14
|
+
|
|
15
|
+
it "raise error if file does not exist" do
|
|
16
|
+
|
|
17
|
+
end
|
|
13
18
|
end
|
|
14
|
-
|
|
19
|
+
|
|
15
20
|
describe "#Discrete_Least_Squares_Meshless_Method" do
|
|
16
21
|
pending
|
|
17
22
|
end
|
|
18
|
-
|
|
23
|
+
|
|
19
24
|
describe "#Explained_Sum_Of_Squares" do
|
|
20
25
|
pending
|
|
21
|
-
end
|
|
22
|
-
|
|
26
|
+
end
|
|
27
|
+
|
|
23
28
|
describe "#Fraction_Of_Variance_Unexplained" do
|
|
24
29
|
pending
|
|
25
30
|
end
|
|
26
|
-
|
|
31
|
+
|
|
27
32
|
describe "#Gauss_Newton_Algorithm" do
|
|
28
33
|
pending
|
|
29
34
|
end
|
|
30
|
-
|
|
35
|
+
|
|
31
36
|
describe "#Iteratively_Reweighted_Least_Squares" do
|
|
32
37
|
pending
|
|
33
38
|
end
|
|
34
|
-
|
|
39
|
+
|
|
35
40
|
describe "#Lack_Of_Fit_Sum_Of_Squares" do
|
|
36
41
|
pending
|
|
37
42
|
end
|
|
38
|
-
|
|
43
|
+
|
|
39
44
|
describe "#Least_Squares_Support_Vector_Machine" do
|
|
40
45
|
pending
|
|
41
46
|
end
|
|
42
|
-
|
|
47
|
+
|
|
43
48
|
describe "#Mean_Squared_Error" do
|
|
44
49
|
pending
|
|
45
50
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: movieDB
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.10
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Albert_McKeever
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2013-12-
|
|
11
|
+
date: 2013-12-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|
|
@@ -154,6 +154,7 @@ files:
|
|
|
154
154
|
- lib/movieDB/base.rb
|
|
155
155
|
- lib/movieDB/data_analysis.rb
|
|
156
156
|
- lib/movieDB/data_export.rb
|
|
157
|
+
- lib/movieDB/data_process.rb
|
|
157
158
|
- lib/movieDB/genre_parser.rb
|
|
158
159
|
- lib/movieDB/genres/en.txt
|
|
159
160
|
- lib/movieDB/movie_error.rb
|
|
@@ -163,9 +164,13 @@ files:
|
|
|
163
164
|
- movieDB.gemspec
|
|
164
165
|
- npm-debug.log
|
|
165
166
|
- reports/.DS_Store
|
|
167
|
+
- reports/Coefficient_Of_Determination_2013121918.xls
|
|
168
|
+
- reports/add.xls
|
|
169
|
+
- reports/imdb_raw_data_2013121911.xls
|
|
170
|
+
- reports/imdb_raw_data_2013121912.xls
|
|
166
171
|
- spec/.DS_Store
|
|
167
|
-
- spec/data_analysis_spec.rb
|
|
168
172
|
- spec/data_export_spec.rb
|
|
173
|
+
- spec/data_process_spec.rb
|
|
169
174
|
- spec/movieDB_spec.rb
|
|
170
175
|
- spec/person_spec.rb
|
|
171
176
|
- spec/spec_helper.rb
|
|
@@ -195,8 +200,8 @@ specification_version: 4
|
|
|
195
200
|
summary: Movie/Film Statistic and Data Analysis
|
|
196
201
|
test_files:
|
|
197
202
|
- spec/.DS_Store
|
|
198
|
-
- spec/data_analysis_spec.rb
|
|
199
203
|
- spec/data_export_spec.rb
|
|
204
|
+
- spec/data_process_spec.rb
|
|
200
205
|
- spec/movieDB_spec.rb
|
|
201
206
|
- spec/person_spec.rb
|
|
202
207
|
- spec/spec_helper.rb
|