statsample-ekatena 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.travis.yml +23 -0
- data/CONTRIBUTING.md +17 -0
- data/Gemfile +2 -0
- data/History.txt +457 -0
- data/LICENSE.txt +12 -0
- data/README.md +175 -0
- data/Rakefile +44 -0
- data/benchmarks/correlation_matrix_15_variables.rb +32 -0
- data/benchmarks/correlation_matrix_5_variables.rb +33 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
- data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
- data/benchmarks/correlation_matrix_methods/results.ds +0 -0
- data/benchmarks/factor_map.rb +37 -0
- data/benchmarks/helpers_benchmark.rb +5 -0
- data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
- data/doc_latex/manual/equations.tex +78 -0
- data/examples/boxplot.rb +28 -0
- data/examples/chisquare_test.rb +23 -0
- data/examples/correlation_matrix.rb +32 -0
- data/examples/dataset.rb +30 -0
- data/examples/dominance_analysis.rb +33 -0
- data/examples/dominance_analysis_bootstrap.rb +32 -0
- data/examples/histogram.rb +26 -0
- data/examples/icc.rb +24 -0
- data/examples/levene.rb +29 -0
- data/examples/multiple_regression.rb +20 -0
- data/examples/multivariate_correlation.rb +33 -0
- data/examples/parallel_analysis.rb +40 -0
- data/examples/polychoric.rb +40 -0
- data/examples/principal_axis.rb +26 -0
- data/examples/reliability.rb +31 -0
- data/examples/scatterplot.rb +25 -0
- data/examples/t_test.rb +27 -0
- data/examples/tetrachoric.rb +17 -0
- data/examples/u_test.rb +24 -0
- data/examples/vector.rb +20 -0
- data/examples/velicer_map_test.rb +46 -0
- data/grab_references.rb +29 -0
- data/lib/spss.rb +134 -0
- data/lib/statsample-ekatena/analysis.rb +100 -0
- data/lib/statsample-ekatena/analysis/suite.rb +89 -0
- data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
- data/lib/statsample-ekatena/anova.rb +24 -0
- data/lib/statsample-ekatena/anova/contrast.rb +79 -0
- data/lib/statsample-ekatena/anova/oneway.rb +187 -0
- data/lib/statsample-ekatena/anova/twoway.rb +207 -0
- data/lib/statsample-ekatena/bivariate.rb +406 -0
- data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
- data/lib/statsample-ekatena/codification.rb +182 -0
- data/lib/statsample-ekatena/converter/csv.rb +28 -0
- data/lib/statsample-ekatena/converter/spss.rb +48 -0
- data/lib/statsample-ekatena/converters.rb +211 -0
- data/lib/statsample-ekatena/crosstab.rb +188 -0
- data/lib/statsample-ekatena/daru.rb +115 -0
- data/lib/statsample-ekatena/dataset.rb +10 -0
- data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
- data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
- data/lib/statsample-ekatena/factor.rb +104 -0
- data/lib/statsample-ekatena/factor/map.rb +124 -0
- data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
- data/lib/statsample-ekatena/factor/pca.rb +242 -0
- data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
- data/lib/statsample-ekatena/factor/rotation.rb +198 -0
- data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
- data/lib/statsample-ekatena/formula/formula.rb +306 -0
- data/lib/statsample-ekatena/graph.rb +11 -0
- data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
- data/lib/statsample-ekatena/graph/histogram.rb +198 -0
- data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
- data/lib/statsample-ekatena/histogram.rb +180 -0
- data/lib/statsample-ekatena/matrix.rb +329 -0
- data/lib/statsample-ekatena/multiset.rb +310 -0
- data/lib/statsample-ekatena/regression.rb +65 -0
- data/lib/statsample-ekatena/regression/multiple.rb +89 -0
- data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
- data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
- data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
- data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
- data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
- data/lib/statsample-ekatena/regression/simple.rb +121 -0
- data/lib/statsample-ekatena/reliability.rb +150 -0
- data/lib/statsample-ekatena/reliability/icc.rb +415 -0
- data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
- data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
- data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
- data/lib/statsample-ekatena/resample.rb +15 -0
- data/lib/statsample-ekatena/shorthand.rb +125 -0
- data/lib/statsample-ekatena/srs.rb +169 -0
- data/lib/statsample-ekatena/test.rb +82 -0
- data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
- data/lib/statsample-ekatena/test/chisquare.rb +73 -0
- data/lib/statsample-ekatena/test/f.rb +52 -0
- data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
- data/lib/statsample-ekatena/test/levene.rb +88 -0
- data/lib/statsample-ekatena/test/t.rb +309 -0
- data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
- data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
- data/lib/statsample-ekatena/vector.rb +19 -0
- data/lib/statsample-ekatena/version.rb +3 -0
- data/lib/statsample.rb +282 -0
- data/po/es/statsample.mo +0 -0
- data/po/es/statsample.po +959 -0
- data/po/statsample.pot +947 -0
- data/references.txt +24 -0
- data/statsample-ekatena.gemspec +49 -0
- data/test/fixtures/bank2.dat +200 -0
- data/test/fixtures/correlation_matrix.rb +17 -0
- data/test/fixtures/df.csv +15 -0
- data/test/fixtures/hartman_23.matrix +9 -0
- data/test/fixtures/stock_data.csv +500 -0
- data/test/fixtures/tetmat_matrix.txt +5 -0
- data/test/fixtures/tetmat_test.txt +1001 -0
- data/test/helpers_tests.rb +83 -0
- data/test/test_analysis.rb +176 -0
- data/test/test_anova_contrast.rb +36 -0
- data/test/test_anovaoneway.rb +26 -0
- data/test/test_anovatwoway.rb +37 -0
- data/test/test_anovatwowaywithdataset.rb +47 -0
- data/test/test_anovawithvectors.rb +102 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_bartlettsphericity.rb +25 -0
- data/test/test_bivariate.rb +164 -0
- data/test/test_codification.rb +78 -0
- data/test/test_crosstab.rb +67 -0
- data/test/test_dominance_analysis.rb +39 -0
- data/test/test_factor.rb +228 -0
- data/test/test_factor_map.rb +38 -0
- data/test/test_factor_pa.rb +56 -0
- data/test/test_fit_model.rb +88 -0
- data/test/test_ggobi.rb +35 -0
- data/test/test_gsl.rb +15 -0
- data/test/test_histogram.rb +109 -0
- data/test/test_matrix.rb +48 -0
- data/test/test_multiset.rb +176 -0
- data/test/test_regression.rb +231 -0
- data/test/test_reliability.rb +223 -0
- data/test/test_reliability_icc.rb +198 -0
- data/test/test_reliability_skillscale.rb +57 -0
- data/test/test_resample.rb +24 -0
- data/test/test_srs.rb +9 -0
- data/test/test_statistics.rb +69 -0
- data/test/test_stest.rb +69 -0
- data/test/test_stratified.rb +17 -0
- data/test/test_test_f.rb +33 -0
- data/test/test_test_kolmogorovsmirnov.rb +34 -0
- data/test/test_test_t.rb +62 -0
- data/test/test_umannwhitney.rb +27 -0
- data/test/test_vector.rb +12 -0
- data/test/test_wilcoxonsignedrank.rb +64 -0
- metadata +570 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
module Statsample
|
2
|
+
module Bivariate
|
3
|
+
# = Pearson correlation coefficient (r)
|
4
|
+
#
|
5
|
+
# The moment-product Pearson's correlation coefficient, known as 'r'
|
6
|
+
# is a measure of bivariate associate between two continous
|
7
|
+
# variables.
|
8
|
+
#
|
9
|
+
# == Usage
|
10
|
+
# a = Daru::Vector.new([1,2,3,4,5,6])
|
11
|
+
# b = Daru::Vector.new([2,3,4,5,6,7])
|
12
|
+
# pearson = Statsample::Bivariate::Pearson.new(a,b)
|
13
|
+
# puts pearson.r
|
14
|
+
# puts pearson.t
|
15
|
+
# puts pearson.probability
|
16
|
+
# puts pearson.summary
|
17
|
+
#
|
18
|
+
class Pearson
|
19
|
+
|
20
|
+
include Statsample::Test
|
21
|
+
include Summarizable
|
22
|
+
# Name of correlation
|
23
|
+
attr_accessor :name
|
24
|
+
# Tails for probability (:both, :left or :right)
|
25
|
+
attr_accessor :tails
|
26
|
+
attr_accessor :n
|
27
|
+
def initialize(v1,v2,opts=Hash.new)
|
28
|
+
@v1_name,@v2_name = v1.name,v2.name
|
29
|
+
@v1,@v2 = Statsample.only_valid_clone(v1,v2)
|
30
|
+
@n=@v1.size
|
31
|
+
opts_default={
|
32
|
+
:name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
|
33
|
+
:tails=>:both
|
34
|
+
}
|
35
|
+
@opts=opts.merge(opts_default)
|
36
|
+
@opts.each{|k,v|
|
37
|
+
self.send("#{k}=",v) if self.respond_to? k
|
38
|
+
}
|
39
|
+
end
|
40
|
+
def r
|
41
|
+
Statsample::Bivariate.pearson(@v1,@v2)
|
42
|
+
end
|
43
|
+
def t
|
44
|
+
Statsample::Bivariate.t_pearson(@v1,@v2)
|
45
|
+
end
|
46
|
+
def probability
|
47
|
+
p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
|
48
|
+
end
|
49
|
+
def report_building(builder)
|
50
|
+
builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,182 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Statsample
|
4
|
+
# This module aids to code open questions
|
5
|
+
# * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
|
6
|
+
# * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
|
7
|
+
# * Recode the vectors, loading the yaml file:
|
8
|
+
# * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
|
9
|
+
# * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
|
10
|
+
#
|
11
|
+
# Usage:
|
12
|
+
# recode_file="recodification.yaml"
|
13
|
+
# phase=:first # flag
|
14
|
+
# if phase==:first
|
15
|
+
# File.open(recode_file,"w") {|fp|
|
16
|
+
# Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
|
17
|
+
# }
|
18
|
+
# # Edit the file recodification.yaml and verify changes
|
19
|
+
# elsif phase==:second
|
20
|
+
# File.open(recode_file,"r") {|fp|
|
21
|
+
# Statsample::Codification.verify(fp,['vector1'])
|
22
|
+
# }
|
23
|
+
# # Add new vectors to the dataset
|
24
|
+
# elsif phase==:third
|
25
|
+
# File.open(recode_file,"r") {|fp|
|
26
|
+
# Statsample::Codification.recode_dataset_split!(ds,fp,"*")
|
27
|
+
# }
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
module Codification
|
31
|
+
class << self
|
32
|
+
# Create a hash, based on vectors, to create the dictionary.
|
33
|
+
# The keys will be vectors name on dataset and the values
|
34
|
+
# will be hashes, with keys = values, for recodification
|
35
|
+
def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
|
36
|
+
raise ArgumentError,"Array should't be empty" if vectors.size==0
|
37
|
+
pro_hash = vectors.inject({}) do |h,v_name|
|
38
|
+
v_name = v_name.is_a?(Numeric) ? v_name : v_name.to_sym
|
39
|
+
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if
|
40
|
+
!dataset.vectors.include?(v_name)
|
41
|
+
v = dataset[v_name]
|
42
|
+
split_data = v.splitted(sep)
|
43
|
+
.flatten
|
44
|
+
.collect { |c| c.to_s }
|
45
|
+
.find_all{ |c| !c.nil? }
|
46
|
+
|
47
|
+
factors = split_data.uniq
|
48
|
+
.compact
|
49
|
+
.sort
|
50
|
+
.inject({}) { |ac,val| ac[val] = val; ac }
|
51
|
+
h[v_name] = factors
|
52
|
+
h
|
53
|
+
end
|
54
|
+
|
55
|
+
pro_hash
|
56
|
+
end
|
57
|
+
# Create a yaml to create a dictionary, based on vectors
|
58
|
+
# The keys will be vectors name on dataset and the values
|
59
|
+
# will be hashes, with keys = values, for recodification
|
60
|
+
#
|
61
|
+
# v1 = Daru::Vector.new(%w{a,b b,c d})
|
62
|
+
# ds = Daru::DataFrame.new({:v1 => v1})
|
63
|
+
# Statsample::Codification.create_yaml(ds,[:v1])
|
64
|
+
# => "--- \nv1: \n a: a\n b: b\n c: c\n d: d\n"
|
65
|
+
def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
|
66
|
+
pro_hash=create_hash(dataset, vectors, sep)
|
67
|
+
YAML.dump(pro_hash,io)
|
68
|
+
end
|
69
|
+
# Create a excel to create a dictionary, based on vectors.
|
70
|
+
# Raises an error if filename exists
|
71
|
+
# The rows will be:
|
72
|
+
# * field: name of vector
|
73
|
+
# * original: original name
|
74
|
+
# * recoded: new code
|
75
|
+
|
76
|
+
def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
|
77
|
+
require 'spreadsheet'
|
78
|
+
if File.exist?(filename)
|
79
|
+
raise "Exists a file named #{filename}. Delete ir before overwrite."
|
80
|
+
end
|
81
|
+
book = Spreadsheet::Workbook.new
|
82
|
+
sheet = book.create_worksheet
|
83
|
+
sheet.row(0).concat(%w(field original recoded))
|
84
|
+
i = 1
|
85
|
+
create_hash(dataset, vectors, sep).sort.each do |field, inner_hash|
|
86
|
+
inner_hash.sort.each do |k,v|
|
87
|
+
sheet.row(i).concat([field.to_s,k.to_s,v.to_s])
|
88
|
+
i += 1
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
book.write(filename)
|
93
|
+
end
|
94
|
+
# From a excel generates a dictionary hash
|
95
|
+
# to use on recode_dataset_simple!() or recode_dataset_split!().
|
96
|
+
#
|
97
|
+
def excel_to_recoded_hash(filename)
|
98
|
+
require 'spreadsheet'
|
99
|
+
h={}
|
100
|
+
book = Spreadsheet.open filename
|
101
|
+
sheet= book.worksheet 0
|
102
|
+
row_i=0
|
103
|
+
sheet.each do |row|
|
104
|
+
row_i += 1
|
105
|
+
next if row_i == 1 or row[0].nil? or row[1].nil? or row[2].nil?
|
106
|
+
key = row[0].to_sym
|
107
|
+
h[key] ||= {}
|
108
|
+
h[key][row[1]] = row[2]
|
109
|
+
end
|
110
|
+
h
|
111
|
+
end
|
112
|
+
|
113
|
+
def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
|
114
|
+
h.inject({}) do |a,v|
|
115
|
+
v[1].split(sep).each do |val|
|
116
|
+
a[val]||=[]
|
117
|
+
a[val].push(v[0])
|
118
|
+
end
|
119
|
+
a
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def dictionary(h, sep=Statsample::SPLIT_TOKEN)
|
124
|
+
h.inject({}) { |a,v| a[v[0]]=v[1].split(sep); a }
|
125
|
+
end
|
126
|
+
|
127
|
+
def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
|
128
|
+
dict = dictionary(h,sep)
|
129
|
+
new_data = v.splitted(sep)
|
130
|
+
new_data.collect do |c|
|
131
|
+
if c.nil?
|
132
|
+
nil
|
133
|
+
else
|
134
|
+
c.collect{|value| dict[value] }.flatten.uniq
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
|
139
|
+
_recode_dataset(dataset,dictionary_hash ,sep,false)
|
140
|
+
end
|
141
|
+
def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
|
142
|
+
_recode_dataset(dataset, dictionary_hash, sep,true)
|
143
|
+
end
|
144
|
+
|
145
|
+
def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
|
146
|
+
v_names||=h.keys
|
147
|
+
v_names.each do |v_name|
|
148
|
+
raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.vectors.include? v_name
|
149
|
+
recoded = Daru::Vector.new(
|
150
|
+
recode_vector(dataset[v_name], h[v_name],sep).collect do |c|
|
151
|
+
if c.nil?
|
152
|
+
nil
|
153
|
+
else
|
154
|
+
c.join(sep)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
)
|
158
|
+
if split
|
159
|
+
recoded.split_by_separator(sep).each {|k,v|
|
160
|
+
dataset[(v_name.to_s + "_" + k).to_sym] = v
|
161
|
+
}
|
162
|
+
else
|
163
|
+
dataset[(v_name.to_s + "_recoded").to_sym] = recoded
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
|
170
|
+
require 'pp'
|
171
|
+
v_names||=h.keys
|
172
|
+
v_names.each{|v_name|
|
173
|
+
inverse=inverse_hash(h[v_name],sep)
|
174
|
+
io.puts "- Field: #{v_name}"
|
175
|
+
inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
|
176
|
+
io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
|
177
|
+
}
|
178
|
+
}
|
179
|
+
end
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# This module will be removed in the next release.
|
2
|
+
# Please shift to using Daru::DataFrame.from_csv and #write_csv for CSV
|
3
|
+
# related operations.
|
4
|
+
module Statsample
|
5
|
+
class CSV
|
6
|
+
class << self
|
7
|
+
# Return a DataFrom created from a csv file.
|
8
|
+
#
|
9
|
+
# == NOTE
|
10
|
+
#
|
11
|
+
# This method has been DEPRECATED in favour of Daru::DataFrame.from_csv.
|
12
|
+
# Please switch to using that.
|
13
|
+
def read(filename, empty = [''], ignore_lines = 0, opts = {})
|
14
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_csv instead."
|
15
|
+
end
|
16
|
+
|
17
|
+
# Save a Dataset on a csv file.
|
18
|
+
#
|
19
|
+
# == NOTE
|
20
|
+
#
|
21
|
+
# This method has BEEN DEPRECATED in favor of Daru::DataFrame#write_csv.
|
22
|
+
# Please use that instead.
|
23
|
+
def write(dataset, filename, convert_comma = false, opts = {})
|
24
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_csv instead."
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Statsample
|
2
|
+
module SPSS
|
3
|
+
class << self
|
4
|
+
# Export a SPSS Matrix with tetrachoric correlations .
|
5
|
+
#
|
6
|
+
# Use:
|
7
|
+
# ds=Daru::DataFrame.from_excel("my_data.xls")
|
8
|
+
# puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
|
9
|
+
def tetrachoric_correlation_matrix(ds)
|
10
|
+
dsv=ds.reject_values(*Daru::MISSING_VALUES)
|
11
|
+
# Delete all vectors doesn't have variation
|
12
|
+
dsv.vectors.each { |f|
|
13
|
+
if dsv[f].factors.size==1
|
14
|
+
dsv.delete_vector(f)
|
15
|
+
else
|
16
|
+
dsv[f]=dsv[f].dichotomize
|
17
|
+
end
|
18
|
+
}
|
19
|
+
|
20
|
+
tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
|
21
|
+
n=dsv.vectors.to_a.collect {|f|
|
22
|
+
sprintf("%d",dsv[f].size)
|
23
|
+
}
|
24
|
+
meanlist=dsv.vectors.to_a.collect{|f|
|
25
|
+
sprintf("%0.3f", dsv[f].mean)
|
26
|
+
}
|
27
|
+
stddevlist=dsv.vectors.to_a.collect{|f|
|
28
|
+
sprintf("%0.3f", dsv[f].sd)
|
29
|
+
}
|
30
|
+
out=<<-HEREDOC
|
31
|
+
MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
|
32
|
+
BEGIN DATA
|
33
|
+
N #{n.join(" ")}
|
34
|
+
MEAN #{meanlist.join(" ")}
|
35
|
+
STDDEV #{stddevlist.join(" ")}
|
36
|
+
HEREDOC
|
37
|
+
tcm.row_size.times {|i|
|
38
|
+
out +="CORR "
|
39
|
+
(i+1).times {|j|
|
40
|
+
out+=sprintf("%0.3f",tcm[i,j])+" "
|
41
|
+
}
|
42
|
+
out +="\n"
|
43
|
+
}
|
44
|
+
out+="END DATA.\nEXECUTE.\n"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,211 @@
|
|
1
|
+
require 'statsample/converter/spss'
|
2
|
+
module Statsample
|
3
|
+
# Create and dumps Datasets on a database
|
4
|
+
#
|
5
|
+
# == NOTE
|
6
|
+
#
|
7
|
+
# Deprecated. Use Daru::DataFrame.from_sql and Daru::DataFrame#write_sql
|
8
|
+
module Database
|
9
|
+
class << self
|
10
|
+
# Read a database query and returns a Dataset
|
11
|
+
#
|
12
|
+
# == NOTE
|
13
|
+
#
|
14
|
+
# Deprecated. Use Daru::DataFrame.from_sql instead.
|
15
|
+
def read(dbh,query)
|
16
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_sql instead."
|
17
|
+
end
|
18
|
+
|
19
|
+
# Insert each case of the Dataset on the selected table
|
20
|
+
#
|
21
|
+
# == NOTE
|
22
|
+
#
|
23
|
+
# Deprecated. Use Daru::DataFrame#write_sql instead
|
24
|
+
def insert(ds, dbh, table)
|
25
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_sql instead."
|
26
|
+
end
|
27
|
+
# Create a sql, basen on a given Dataset
|
28
|
+
#
|
29
|
+
# == NOTE
|
30
|
+
#
|
31
|
+
# Deprecated. Use Daru::DataFrame#create_sql instead.
|
32
|
+
def create_sql(ds,table,charset="UTF8")
|
33
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame#create_sql instead."
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
module Mondrian
|
38
|
+
class << self
|
39
|
+
def write(dataset,filename)
|
40
|
+
File.open(filename,"wb") do |fp|
|
41
|
+
fp.puts dataset.vectors.to_a.join("\t")
|
42
|
+
dataset.each_row do |row|
|
43
|
+
row2 = row.map { |v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
|
44
|
+
fp.puts row2.join("\t")
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class PlainText
|
52
|
+
class << self
|
53
|
+
def read(filename, fields)
|
54
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_plaintext instead."
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# This class has been DEPRECATED. Use Daru::DataFrame::from_excel
|
60
|
+
# Daru::DataFrame#write_excel for XLS file operations.
|
61
|
+
class Excel
|
62
|
+
class << self
|
63
|
+
# Write a Excel spreadsheet based on a dataset
|
64
|
+
# * TODO: Format nicely date values
|
65
|
+
#
|
66
|
+
# == NOTE
|
67
|
+
#
|
68
|
+
# Deprecated. Use Daru::DataFrame#write_csv.
|
69
|
+
def write(dataset,filename)
|
70
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_excel instead."
|
71
|
+
end
|
72
|
+
|
73
|
+
# Returns a dataset based on a xls file
|
74
|
+
#
|
75
|
+
# == NOTE
|
76
|
+
#
|
77
|
+
# Deprecated. Use Daru::DataFrame.from_excel instead.
|
78
|
+
def read(filename, opts=Hash.new)
|
79
|
+
raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_excel instead."
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
module Mx
|
85
|
+
class << self
|
86
|
+
def write(dataset,filename,type=:covariance)
|
87
|
+
puts "Writing MX File"
|
88
|
+
File.open(filename,"w") do |fp|
|
89
|
+
fp.puts "! #{filename}"
|
90
|
+
fp.puts "! Output generated by Statsample"
|
91
|
+
fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
|
92
|
+
fp.puts "Labels " + dataset.vectors.to_a.join(" ")
|
93
|
+
case type
|
94
|
+
when :raw
|
95
|
+
fp.puts "Rectangular"
|
96
|
+
dataset.each do |row|
|
97
|
+
out=dataset.vectors.to_a.collect do |f|
|
98
|
+
if dataset[f].is_valid? row[f]
|
99
|
+
row[f]
|
100
|
+
else
|
101
|
+
"."
|
102
|
+
end
|
103
|
+
end
|
104
|
+
fp.puts out.join("\t")
|
105
|
+
end
|
106
|
+
fp.puts "End Rectangular"
|
107
|
+
when :covariance
|
108
|
+
fp.puts " CMatrix Full"
|
109
|
+
cm=Statsample::Bivariate.covariance_matrix(dataset)
|
110
|
+
d=(0...(cm.row_size)).collect {|row|
|
111
|
+
(0...(cm.column_size)).collect{|col|
|
112
|
+
cm[row,col].nil? ? "." : sprintf("%0.3f", cm[row,col])
|
113
|
+
}.join(" ")
|
114
|
+
}.join("\n")
|
115
|
+
fp.puts d
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
module GGobi
|
122
|
+
class << self
|
123
|
+
def write(dataset,filename,opt={})
|
124
|
+
File.open(filename,"w") {|fp|
|
125
|
+
fp.write(self.out(dataset,opt))
|
126
|
+
}
|
127
|
+
end
|
128
|
+
def out(dataset,opt={})
|
129
|
+
require 'ostruct'
|
130
|
+
default_opt = {:dataname => "Default", :description=>"", :missing=>"NA"}
|
131
|
+
default_opt.merge! opt
|
132
|
+
carrier=OpenStruct.new
|
133
|
+
carrier.categorials=[]
|
134
|
+
carrier.conversions={}
|
135
|
+
variables_def=dataset.vectors.to_a.collect{|k|
|
136
|
+
variable_definition(carrier,dataset[k],k)
|
137
|
+
}.join("\n")
|
138
|
+
|
139
|
+
indexes=carrier.categorials.inject({}) {|s,c|
|
140
|
+
s[dataset.vectors.to_a.index(c)]=c
|
141
|
+
s
|
142
|
+
}
|
143
|
+
records=""
|
144
|
+
dataset.each_row {|c|
|
145
|
+
indexes.each { |ik,iv|
|
146
|
+
c[ik] = carrier.conversions[iv][c[ik]]
|
147
|
+
}
|
148
|
+
records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
|
149
|
+
}
|
150
|
+
|
151
|
+
out=<<EOC
|
152
|
+
<?xml version="1.0"?>
|
153
|
+
<!DOCTYPE ggobidata SYSTEM "ggobi.dtd">
|
154
|
+
<ggobidata count="1">
|
155
|
+
<data name="#{default_opt[:dataname]}">
|
156
|
+
<description>#{default_opt[:description]}</description>
|
157
|
+
<variables count="#{dataset.fields.size}">
|
158
|
+
#{variables_def}
|
159
|
+
</variables>
|
160
|
+
<records count="#{dataset.cases}" missingValue="#{default_opt[:missing]}">
|
161
|
+
#{records}
|
162
|
+
</records>
|
163
|
+
|
164
|
+
</data>
|
165
|
+
</ggobidata>
|
166
|
+
EOC
|
167
|
+
|
168
|
+
out
|
169
|
+
|
170
|
+
end
|
171
|
+
def values_definition(c,missing)
|
172
|
+
c.collect{|v|
|
173
|
+
if v.nil?
|
174
|
+
"#{missing}"
|
175
|
+
elsif v.is_a? Numeric
|
176
|
+
"#{v}"
|
177
|
+
else
|
178
|
+
"#{v.gsub(/\s+/,"_")}"
|
179
|
+
end
|
180
|
+
}.join(" ")
|
181
|
+
end
|
182
|
+
# Outputs a string for a variable definition
|
183
|
+
# v = vector
|
184
|
+
# name = name of the variable
|
185
|
+
# nickname = nickname
|
186
|
+
def variable_definition(carrier,v,name,nickname=nil)
|
187
|
+
nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
|
188
|
+
if v.type==:object or v.to_a.find {|d| d.is_a? String }
|
189
|
+
carrier.categorials.push(name)
|
190
|
+
carrier.conversions[name]={}
|
191
|
+
factors=v.factors
|
192
|
+
out ="<categoricalvariable name=\"#{name}\" #{nickname}>\n"
|
193
|
+
out << "<levels count=\"#{factors.size}\">\n"
|
194
|
+
out << (1..factors.size).to_a.collect{|i|
|
195
|
+
carrier.conversions[name][factors[i-1]]=i
|
196
|
+
"<level value=\"#{i}\">#{(v.labels[factors[i-1]] || factors[i-1])}</level>"
|
197
|
+
}.join("\n")
|
198
|
+
out << "</levels>\n</categoricalvariable>\n"
|
199
|
+
out
|
200
|
+
elsif v.to_a.find {|d| d.is_a? Float}
|
201
|
+
"<realvariable name=\"#{name}\" #{nickname} />"
|
202
|
+
else
|
203
|
+
"<integervariable name=\"#{name}\" #{nickname} />"
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
require 'statsample/converter/csv.rb'
|
211
|
+
|