statsample-ekatena 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,54 @@
1
+ module Statsample
2
+ module Bivariate
3
+ # = Pearson correlation coefficient (r)
4
+ #
5
+ # The moment-product Pearson's correlation coefficient, known as 'r'
6
+ # is a measure of bivariate associate between two continous
7
+ # variables.
8
+ #
9
+ # == Usage
10
+ # a = Daru::Vector.new([1,2,3,4,5,6])
11
+ # b = Daru::Vector.new([2,3,4,5,6,7])
12
+ # pearson = Statsample::Bivariate::Pearson.new(a,b)
13
+ # puts pearson.r
14
+ # puts pearson.t
15
+ # puts pearson.probability
16
+ # puts pearson.summary
17
+ #
18
+ class Pearson
19
+
20
+ include Statsample::Test
21
+ include Summarizable
22
+ # Name of correlation
23
+ attr_accessor :name
24
+ # Tails for probability (:both, :left or :right)
25
+ attr_accessor :tails
26
+ attr_accessor :n
27
+ def initialize(v1,v2,opts=Hash.new)
28
+ @v1_name,@v2_name = v1.name,v2.name
29
+ @v1,@v2 = Statsample.only_valid_clone(v1,v2)
30
+ @n=@v1.size
31
+ opts_default={
32
+ :name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
33
+ :tails=>:both
34
+ }
35
+ @opts=opts.merge(opts_default)
36
+ @opts.each{|k,v|
37
+ self.send("#{k}=",v) if self.respond_to? k
38
+ }
39
+ end
40
+ def r
41
+ Statsample::Bivariate.pearson(@v1,@v2)
42
+ end
43
+ def t
44
+ Statsample::Bivariate.t_pearson(@v1,@v2)
45
+ end
46
+ def probability
47
+ p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
48
+ end
49
+ def report_building(builder)
50
+ builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,182 @@
1
+ require 'yaml'
2
+
3
+ module Statsample
4
+ # This module aids to code open questions
5
+ # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
6
+ # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
7
+ # * Recode the vectors, loading the yaml file:
8
+ # * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
9
+ # * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
10
+ #
11
+ # Usage:
12
+ # recode_file="recodification.yaml"
13
+ # phase=:first # flag
14
+ # if phase==:first
15
+ # File.open(recode_file,"w") {|fp|
16
+ # Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
17
+ # }
18
+ # # Edit the file recodification.yaml and verify changes
19
+ # elsif phase==:second
20
+ # File.open(recode_file,"r") {|fp|
21
+ # Statsample::Codification.verify(fp,['vector1'])
22
+ # }
23
+ # # Add new vectors to the dataset
24
+ # elsif phase==:third
25
+ # File.open(recode_file,"r") {|fp|
26
+ # Statsample::Codification.recode_dataset_split!(ds,fp,"*")
27
+ # }
28
+ # end
29
+ #
30
+ module Codification
31
+ class << self
32
+ # Create a hash, based on vectors, to create the dictionary.
33
+ # The keys will be vectors name on dataset and the values
34
+ # will be hashes, with keys = values, for recodification
35
+ def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
36
+ raise ArgumentError,"Array should't be empty" if vectors.size==0
37
+ pro_hash = vectors.inject({}) do |h,v_name|
38
+ v_name = v_name.is_a?(Numeric) ? v_name : v_name.to_sym
39
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if
40
+ !dataset.vectors.include?(v_name)
41
+ v = dataset[v_name]
42
+ split_data = v.splitted(sep)
43
+ .flatten
44
+ .collect { |c| c.to_s }
45
+ .find_all{ |c| !c.nil? }
46
+
47
+ factors = split_data.uniq
48
+ .compact
49
+ .sort
50
+ .inject({}) { |ac,val| ac[val] = val; ac }
51
+ h[v_name] = factors
52
+ h
53
+ end
54
+
55
+ pro_hash
56
+ end
57
+ # Create a yaml to create a dictionary, based on vectors
58
+ # The keys will be vectors name on dataset and the values
59
+ # will be hashes, with keys = values, for recodification
60
+ #
61
+ # v1 = Daru::Vector.new(%w{a,b b,c d})
62
+ # ds = Daru::DataFrame.new({:v1 => v1})
63
+ # Statsample::Codification.create_yaml(ds,[:v1])
64
+ # => "--- \nv1: \n a: a\n b: b\n c: c\n d: d\n"
65
+ def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
66
+ pro_hash=create_hash(dataset, vectors, sep)
67
+ YAML.dump(pro_hash,io)
68
+ end
69
+ # Create a excel to create a dictionary, based on vectors.
70
+ # Raises an error if filename exists
71
+ # The rows will be:
72
+ # * field: name of vector
73
+ # * original: original name
74
+ # * recoded: new code
75
+
76
+ def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
77
+ require 'spreadsheet'
78
+ if File.exist?(filename)
79
+ raise "Exists a file named #{filename}. Delete ir before overwrite."
80
+ end
81
+ book = Spreadsheet::Workbook.new
82
+ sheet = book.create_worksheet
83
+ sheet.row(0).concat(%w(field original recoded))
84
+ i = 1
85
+ create_hash(dataset, vectors, sep).sort.each do |field, inner_hash|
86
+ inner_hash.sort.each do |k,v|
87
+ sheet.row(i).concat([field.to_s,k.to_s,v.to_s])
88
+ i += 1
89
+ end
90
+ end
91
+
92
+ book.write(filename)
93
+ end
94
+ # From a excel generates a dictionary hash
95
+ # to use on recode_dataset_simple!() or recode_dataset_split!().
96
+ #
97
+ def excel_to_recoded_hash(filename)
98
+ require 'spreadsheet'
99
+ h={}
100
+ book = Spreadsheet.open filename
101
+ sheet= book.worksheet 0
102
+ row_i=0
103
+ sheet.each do |row|
104
+ row_i += 1
105
+ next if row_i == 1 or row[0].nil? or row[1].nil? or row[2].nil?
106
+ key = row[0].to_sym
107
+ h[key] ||= {}
108
+ h[key][row[1]] = row[2]
109
+ end
110
+ h
111
+ end
112
+
113
+ def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
114
+ h.inject({}) do |a,v|
115
+ v[1].split(sep).each do |val|
116
+ a[val]||=[]
117
+ a[val].push(v[0])
118
+ end
119
+ a
120
+ end
121
+ end
122
+
123
+ def dictionary(h, sep=Statsample::SPLIT_TOKEN)
124
+ h.inject({}) { |a,v| a[v[0]]=v[1].split(sep); a }
125
+ end
126
+
127
+ def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
128
+ dict = dictionary(h,sep)
129
+ new_data = v.splitted(sep)
130
+ new_data.collect do |c|
131
+ if c.nil?
132
+ nil
133
+ else
134
+ c.collect{|value| dict[value] }.flatten.uniq
135
+ end
136
+ end
137
+ end
138
+ def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
139
+ _recode_dataset(dataset,dictionary_hash ,sep,false)
140
+ end
141
+ def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
142
+ _recode_dataset(dataset, dictionary_hash, sep,true)
143
+ end
144
+
145
+ def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
146
+ v_names||=h.keys
147
+ v_names.each do |v_name|
148
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.vectors.include? v_name
149
+ recoded = Daru::Vector.new(
150
+ recode_vector(dataset[v_name], h[v_name],sep).collect do |c|
151
+ if c.nil?
152
+ nil
153
+ else
154
+ c.join(sep)
155
+ end
156
+ end
157
+ )
158
+ if split
159
+ recoded.split_by_separator(sep).each {|k,v|
160
+ dataset[(v_name.to_s + "_" + k).to_sym] = v
161
+ }
162
+ else
163
+ dataset[(v_name.to_s + "_recoded").to_sym] = recoded
164
+ end
165
+ end
166
+ end
167
+
168
+
169
+ def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
170
+ require 'pp'
171
+ v_names||=h.keys
172
+ v_names.each{|v_name|
173
+ inverse=inverse_hash(h[v_name],sep)
174
+ io.puts "- Field: #{v_name}"
175
+ inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
176
+ io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
177
+ }
178
+ }
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,28 @@
1
+ # This module will be removed in the next release.
2
+ # Please shift to using Daru::DataFrame.from_csv and #write_csv for CSV
3
+ # related operations.
4
+ module Statsample
5
+ class CSV
6
+ class << self
7
+ # Return a DataFrom created from a csv file.
8
+ #
9
+ # == NOTE
10
+ #
11
+ # This method has been DEPRECATED in favour of Daru::DataFrame.from_csv.
12
+ # Please switch to using that.
13
+ def read(filename, empty = [''], ignore_lines = 0, opts = {})
14
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_csv instead."
15
+ end
16
+
17
+ # Save a Dataset on a csv file.
18
+ #
19
+ # == NOTE
20
+ #
21
+ # This method has BEEN DEPRECATED in favor of Daru::DataFrame#write_csv.
22
+ # Please use that instead.
23
+ def write(dataset, filename, convert_comma = false, opts = {})
24
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_csv instead."
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,48 @@
1
+ module Statsample
2
+ module SPSS
3
+ class << self
4
+ # Export a SPSS Matrix with tetrachoric correlations .
5
+ #
6
+ # Use:
7
+ # ds=Daru::DataFrame.from_excel("my_data.xls")
8
+ # puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
9
+ def tetrachoric_correlation_matrix(ds)
10
+ dsv=ds.reject_values(*Daru::MISSING_VALUES)
11
+ # Delete all vectors doesn't have variation
12
+ dsv.vectors.each { |f|
13
+ if dsv[f].factors.size==1
14
+ dsv.delete_vector(f)
15
+ else
16
+ dsv[f]=dsv[f].dichotomize
17
+ end
18
+ }
19
+
20
+ tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
21
+ n=dsv.vectors.to_a.collect {|f|
22
+ sprintf("%d",dsv[f].size)
23
+ }
24
+ meanlist=dsv.vectors.to_a.collect{|f|
25
+ sprintf("%0.3f", dsv[f].mean)
26
+ }
27
+ stddevlist=dsv.vectors.to_a.collect{|f|
28
+ sprintf("%0.3f", dsv[f].sd)
29
+ }
30
+ out=<<-HEREDOC
31
+ MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
32
+ BEGIN DATA
33
+ N #{n.join(" ")}
34
+ MEAN #{meanlist.join(" ")}
35
+ STDDEV #{stddevlist.join(" ")}
36
+ HEREDOC
37
+ tcm.row_size.times {|i|
38
+ out +="CORR "
39
+ (i+1).times {|j|
40
+ out+=sprintf("%0.3f",tcm[i,j])+" "
41
+ }
42
+ out +="\n"
43
+ }
44
+ out+="END DATA.\nEXECUTE.\n"
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,211 @@
1
+ require 'statsample/converter/spss'
2
+ module Statsample
3
+ # Create and dumps Datasets on a database
4
+ #
5
+ # == NOTE
6
+ #
7
+ # Deprecated. Use Daru::DataFrame.from_sql and Daru::DataFrame#write_sql
8
+ module Database
9
+ class << self
10
+ # Read a database query and returns a Dataset
11
+ #
12
+ # == NOTE
13
+ #
14
+ # Deprecated. Use Daru::DataFrame.from_sql instead.
15
+ def read(dbh,query)
16
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_sql instead."
17
+ end
18
+
19
+ # Insert each case of the Dataset on the selected table
20
+ #
21
+ # == NOTE
22
+ #
23
+ # Deprecated. Use Daru::DataFrame#write_sql instead
24
+ def insert(ds, dbh, table)
25
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_sql instead."
26
+ end
27
+ # Create a sql, basen on a given Dataset
28
+ #
29
+ # == NOTE
30
+ #
31
+ # Deprecated. Use Daru::DataFrame#create_sql instead.
32
+ def create_sql(ds,table,charset="UTF8")
33
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#create_sql instead."
34
+ end
35
+ end
36
+ end
37
+ module Mondrian
38
+ class << self
39
+ def write(dataset,filename)
40
+ File.open(filename,"wb") do |fp|
41
+ fp.puts dataset.vectors.to_a.join("\t")
42
+ dataset.each_row do |row|
43
+ row2 = row.map { |v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
44
+ fp.puts row2.join("\t")
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ class PlainText
52
+ class << self
53
+ def read(filename, fields)
54
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_plaintext instead."
55
+ end
56
+ end
57
+ end
58
+
59
+ # This class has been DEPRECATED. Use Daru::DataFrame::from_excel
60
+ # Daru::DataFrame#write_excel for XLS file operations.
61
+ class Excel
62
+ class << self
63
+ # Write a Excel spreadsheet based on a dataset
64
+ # * TODO: Format nicely date values
65
+ #
66
+ # == NOTE
67
+ #
68
+ # Deprecated. Use Daru::DataFrame#write_csv.
69
+ def write(dataset,filename)
70
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_excel instead."
71
+ end
72
+
73
+ # Returns a dataset based on a xls file
74
+ #
75
+ # == NOTE
76
+ #
77
+ # Deprecated. Use Daru::DataFrame.from_excel instead.
78
+ def read(filename, opts=Hash.new)
79
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_excel instead."
80
+ end
81
+ end
82
+ end
83
+
84
+ module Mx
85
+ class << self
86
+ def write(dataset,filename,type=:covariance)
87
+ puts "Writing MX File"
88
+ File.open(filename,"w") do |fp|
89
+ fp.puts "! #{filename}"
90
+ fp.puts "! Output generated by Statsample"
91
+ fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
92
+ fp.puts "Labels " + dataset.vectors.to_a.join(" ")
93
+ case type
94
+ when :raw
95
+ fp.puts "Rectangular"
96
+ dataset.each do |row|
97
+ out=dataset.vectors.to_a.collect do |f|
98
+ if dataset[f].is_valid? row[f]
99
+ row[f]
100
+ else
101
+ "."
102
+ end
103
+ end
104
+ fp.puts out.join("\t")
105
+ end
106
+ fp.puts "End Rectangular"
107
+ when :covariance
108
+ fp.puts " CMatrix Full"
109
+ cm=Statsample::Bivariate.covariance_matrix(dataset)
110
+ d=(0...(cm.row_size)).collect {|row|
111
+ (0...(cm.column_size)).collect{|col|
112
+ cm[row,col].nil? ? "." : sprintf("%0.3f", cm[row,col])
113
+ }.join(" ")
114
+ }.join("\n")
115
+ fp.puts d
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ module GGobi
122
+ class << self
123
+ def write(dataset,filename,opt={})
124
+ File.open(filename,"w") {|fp|
125
+ fp.write(self.out(dataset,opt))
126
+ }
127
+ end
128
+ def out(dataset,opt={})
129
+ require 'ostruct'
130
+ default_opt = {:dataname => "Default", :description=>"", :missing=>"NA"}
131
+ default_opt.merge! opt
132
+ carrier=OpenStruct.new
133
+ carrier.categorials=[]
134
+ carrier.conversions={}
135
+ variables_def=dataset.vectors.to_a.collect{|k|
136
+ variable_definition(carrier,dataset[k],k)
137
+ }.join("\n")
138
+
139
+ indexes=carrier.categorials.inject({}) {|s,c|
140
+ s[dataset.vectors.to_a.index(c)]=c
141
+ s
142
+ }
143
+ records=""
144
+ dataset.each_row {|c|
145
+ indexes.each { |ik,iv|
146
+ c[ik] = carrier.conversions[iv][c[ik]]
147
+ }
148
+ records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
149
+ }
150
+
151
+ out=<<EOC
152
+ <?xml version="1.0"?>
153
+ <!DOCTYPE ggobidata SYSTEM "ggobi.dtd">
154
+ <ggobidata count="1">
155
+ <data name="#{default_opt[:dataname]}">
156
+ <description>#{default_opt[:description]}</description>
157
+ <variables count="#{dataset.fields.size}">
158
+ #{variables_def}
159
+ </variables>
160
+ <records count="#{dataset.cases}" missingValue="#{default_opt[:missing]}">
161
+ #{records}
162
+ </records>
163
+
164
+ </data>
165
+ </ggobidata>
166
+ EOC
167
+
168
+ out
169
+
170
+ end
171
+ def values_definition(c,missing)
172
+ c.collect{|v|
173
+ if v.nil?
174
+ "#{missing}"
175
+ elsif v.is_a? Numeric
176
+ "#{v}"
177
+ else
178
+ "#{v.gsub(/\s+/,"_")}"
179
+ end
180
+ }.join(" ")
181
+ end
182
+ # Outputs a string for a variable definition
183
+ # v = vector
184
+ # name = name of the variable
185
+ # nickname = nickname
186
+ def variable_definition(carrier,v,name,nickname=nil)
187
+ nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
188
+ if v.type==:object or v.to_a.find {|d| d.is_a? String }
189
+ carrier.categorials.push(name)
190
+ carrier.conversions[name]={}
191
+ factors=v.factors
192
+ out ="<categoricalvariable name=\"#{name}\" #{nickname}>\n"
193
+ out << "<levels count=\"#{factors.size}\">\n"
194
+ out << (1..factors.size).to_a.collect{|i|
195
+ carrier.conversions[name][factors[i-1]]=i
196
+ "<level value=\"#{i}\">#{(v.labels[factors[i-1]] || factors[i-1])}</level>"
197
+ }.join("\n")
198
+ out << "</levels>\n</categoricalvariable>\n"
199
+ out
200
+ elsif v.to_a.find {|d| d.is_a? Float}
201
+ "<realvariable name=\"#{name}\" #{nickname} />"
202
+ else
203
+ "<integervariable name=\"#{name}\" #{nickname} />"
204
+ end
205
+ end
206
+ end
207
+ end
208
+ end
209
+
210
+ require 'statsample/converter/csv.rb'
211
+