statsample-ekatena 2.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.travis.yml +23 -0
  4. data/CONTRIBUTING.md +17 -0
  5. data/Gemfile +2 -0
  6. data/History.txt +457 -0
  7. data/LICENSE.txt +12 -0
  8. data/README.md +175 -0
  9. data/Rakefile +44 -0
  10. data/benchmarks/correlation_matrix_15_variables.rb +32 -0
  11. data/benchmarks/correlation_matrix_5_variables.rb +33 -0
  12. data/benchmarks/correlation_matrix_methods/correlation_matrix.ds +0 -0
  13. data/benchmarks/correlation_matrix_methods/correlation_matrix.html +93 -0
  14. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +71 -0
  15. data/benchmarks/correlation_matrix_methods/correlation_matrix.xls +0 -0
  16. data/benchmarks/correlation_matrix_methods/correlation_matrix_gsl_ruby.ods +0 -0
  17. data/benchmarks/correlation_matrix_methods/correlation_matrix_with_graphics.ods +0 -0
  18. data/benchmarks/correlation_matrix_methods/results.ds +0 -0
  19. data/benchmarks/factor_map.rb +37 -0
  20. data/benchmarks/helpers_benchmark.rb +5 -0
  21. data/data/locale/es/LC_MESSAGES/statsample.mo +0 -0
  22. data/doc_latex/manual/equations.tex +78 -0
  23. data/examples/boxplot.rb +28 -0
  24. data/examples/chisquare_test.rb +23 -0
  25. data/examples/correlation_matrix.rb +32 -0
  26. data/examples/dataset.rb +30 -0
  27. data/examples/dominance_analysis.rb +33 -0
  28. data/examples/dominance_analysis_bootstrap.rb +32 -0
  29. data/examples/histogram.rb +26 -0
  30. data/examples/icc.rb +24 -0
  31. data/examples/levene.rb +29 -0
  32. data/examples/multiple_regression.rb +20 -0
  33. data/examples/multivariate_correlation.rb +33 -0
  34. data/examples/parallel_analysis.rb +40 -0
  35. data/examples/polychoric.rb +40 -0
  36. data/examples/principal_axis.rb +26 -0
  37. data/examples/reliability.rb +31 -0
  38. data/examples/scatterplot.rb +25 -0
  39. data/examples/t_test.rb +27 -0
  40. data/examples/tetrachoric.rb +17 -0
  41. data/examples/u_test.rb +24 -0
  42. data/examples/vector.rb +20 -0
  43. data/examples/velicer_map_test.rb +46 -0
  44. data/grab_references.rb +29 -0
  45. data/lib/spss.rb +134 -0
  46. data/lib/statsample-ekatena/analysis.rb +100 -0
  47. data/lib/statsample-ekatena/analysis/suite.rb +89 -0
  48. data/lib/statsample-ekatena/analysis/suitereportbuilder.rb +44 -0
  49. data/lib/statsample-ekatena/anova.rb +24 -0
  50. data/lib/statsample-ekatena/anova/contrast.rb +79 -0
  51. data/lib/statsample-ekatena/anova/oneway.rb +187 -0
  52. data/lib/statsample-ekatena/anova/twoway.rb +207 -0
  53. data/lib/statsample-ekatena/bivariate.rb +406 -0
  54. data/lib/statsample-ekatena/bivariate/pearson.rb +54 -0
  55. data/lib/statsample-ekatena/codification.rb +182 -0
  56. data/lib/statsample-ekatena/converter/csv.rb +28 -0
  57. data/lib/statsample-ekatena/converter/spss.rb +48 -0
  58. data/lib/statsample-ekatena/converters.rb +211 -0
  59. data/lib/statsample-ekatena/crosstab.rb +188 -0
  60. data/lib/statsample-ekatena/daru.rb +115 -0
  61. data/lib/statsample-ekatena/dataset.rb +10 -0
  62. data/lib/statsample-ekatena/dominanceanalysis.rb +425 -0
  63. data/lib/statsample-ekatena/dominanceanalysis/bootstrap.rb +232 -0
  64. data/lib/statsample-ekatena/factor.rb +104 -0
  65. data/lib/statsample-ekatena/factor/map.rb +124 -0
  66. data/lib/statsample-ekatena/factor/parallelanalysis.rb +166 -0
  67. data/lib/statsample-ekatena/factor/pca.rb +242 -0
  68. data/lib/statsample-ekatena/factor/principalaxis.rb +243 -0
  69. data/lib/statsample-ekatena/factor/rotation.rb +198 -0
  70. data/lib/statsample-ekatena/formula/fit_model.rb +46 -0
  71. data/lib/statsample-ekatena/formula/formula.rb +306 -0
  72. data/lib/statsample-ekatena/graph.rb +11 -0
  73. data/lib/statsample-ekatena/graph/boxplot.rb +236 -0
  74. data/lib/statsample-ekatena/graph/histogram.rb +198 -0
  75. data/lib/statsample-ekatena/graph/scatterplot.rb +213 -0
  76. data/lib/statsample-ekatena/histogram.rb +180 -0
  77. data/lib/statsample-ekatena/matrix.rb +329 -0
  78. data/lib/statsample-ekatena/multiset.rb +310 -0
  79. data/lib/statsample-ekatena/regression.rb +65 -0
  80. data/lib/statsample-ekatena/regression/multiple.rb +89 -0
  81. data/lib/statsample-ekatena/regression/multiple/alglibengine.rb +128 -0
  82. data/lib/statsample-ekatena/regression/multiple/baseengine.rb +251 -0
  83. data/lib/statsample-ekatena/regression/multiple/gslengine.rb +129 -0
  84. data/lib/statsample-ekatena/regression/multiple/matrixengine.rb +205 -0
  85. data/lib/statsample-ekatena/regression/multiple/rubyengine.rb +86 -0
  86. data/lib/statsample-ekatena/regression/simple.rb +121 -0
  87. data/lib/statsample-ekatena/reliability.rb +150 -0
  88. data/lib/statsample-ekatena/reliability/icc.rb +415 -0
  89. data/lib/statsample-ekatena/reliability/multiscaleanalysis.rb +181 -0
  90. data/lib/statsample-ekatena/reliability/scaleanalysis.rb +233 -0
  91. data/lib/statsample-ekatena/reliability/skillscaleanalysis.rb +114 -0
  92. data/lib/statsample-ekatena/resample.rb +15 -0
  93. data/lib/statsample-ekatena/shorthand.rb +125 -0
  94. data/lib/statsample-ekatena/srs.rb +169 -0
  95. data/lib/statsample-ekatena/test.rb +82 -0
  96. data/lib/statsample-ekatena/test/bartlettsphericity.rb +45 -0
  97. data/lib/statsample-ekatena/test/chisquare.rb +73 -0
  98. data/lib/statsample-ekatena/test/f.rb +52 -0
  99. data/lib/statsample-ekatena/test/kolmogorovsmirnov.rb +63 -0
  100. data/lib/statsample-ekatena/test/levene.rb +88 -0
  101. data/lib/statsample-ekatena/test/t.rb +309 -0
  102. data/lib/statsample-ekatena/test/umannwhitney.rb +208 -0
  103. data/lib/statsample-ekatena/test/wilcoxonsignedrank.rb +90 -0
  104. data/lib/statsample-ekatena/vector.rb +19 -0
  105. data/lib/statsample-ekatena/version.rb +3 -0
  106. data/lib/statsample.rb +282 -0
  107. data/po/es/statsample.mo +0 -0
  108. data/po/es/statsample.po +959 -0
  109. data/po/statsample.pot +947 -0
  110. data/references.txt +24 -0
  111. data/statsample-ekatena.gemspec +49 -0
  112. data/test/fixtures/bank2.dat +200 -0
  113. data/test/fixtures/correlation_matrix.rb +17 -0
  114. data/test/fixtures/df.csv +15 -0
  115. data/test/fixtures/hartman_23.matrix +9 -0
  116. data/test/fixtures/stock_data.csv +500 -0
  117. data/test/fixtures/tetmat_matrix.txt +5 -0
  118. data/test/fixtures/tetmat_test.txt +1001 -0
  119. data/test/helpers_tests.rb +83 -0
  120. data/test/test_analysis.rb +176 -0
  121. data/test/test_anova_contrast.rb +36 -0
  122. data/test/test_anovaoneway.rb +26 -0
  123. data/test/test_anovatwoway.rb +37 -0
  124. data/test/test_anovatwowaywithdataset.rb +47 -0
  125. data/test/test_anovawithvectors.rb +102 -0
  126. data/test/test_awesome_print_bug.rb +16 -0
  127. data/test/test_bartlettsphericity.rb +25 -0
  128. data/test/test_bivariate.rb +164 -0
  129. data/test/test_codification.rb +78 -0
  130. data/test/test_crosstab.rb +67 -0
  131. data/test/test_dominance_analysis.rb +39 -0
  132. data/test/test_factor.rb +228 -0
  133. data/test/test_factor_map.rb +38 -0
  134. data/test/test_factor_pa.rb +56 -0
  135. data/test/test_fit_model.rb +88 -0
  136. data/test/test_ggobi.rb +35 -0
  137. data/test/test_gsl.rb +15 -0
  138. data/test/test_histogram.rb +109 -0
  139. data/test/test_matrix.rb +48 -0
  140. data/test/test_multiset.rb +176 -0
  141. data/test/test_regression.rb +231 -0
  142. data/test/test_reliability.rb +223 -0
  143. data/test/test_reliability_icc.rb +198 -0
  144. data/test/test_reliability_skillscale.rb +57 -0
  145. data/test/test_resample.rb +24 -0
  146. data/test/test_srs.rb +9 -0
  147. data/test/test_statistics.rb +69 -0
  148. data/test/test_stest.rb +69 -0
  149. data/test/test_stratified.rb +17 -0
  150. data/test/test_test_f.rb +33 -0
  151. data/test/test_test_kolmogorovsmirnov.rb +34 -0
  152. data/test/test_test_t.rb +62 -0
  153. data/test/test_umannwhitney.rb +27 -0
  154. data/test/test_vector.rb +12 -0
  155. data/test/test_wilcoxonsignedrank.rb +64 -0
  156. metadata +570 -0
@@ -0,0 +1,54 @@
1
+ module Statsample
2
+ module Bivariate
3
+ # = Pearson correlation coefficient (r)
4
+ #
5
+ # The moment-product Pearson's correlation coefficient, known as 'r'
6
+ # is a measure of bivariate associate between two continous
7
+ # variables.
8
+ #
9
+ # == Usage
10
+ # a = Daru::Vector.new([1,2,3,4,5,6])
11
+ # b = Daru::Vector.new([2,3,4,5,6,7])
12
+ # pearson = Statsample::Bivariate::Pearson.new(a,b)
13
+ # puts pearson.r
14
+ # puts pearson.t
15
+ # puts pearson.probability
16
+ # puts pearson.summary
17
+ #
18
+ class Pearson
19
+
20
+ include Statsample::Test
21
+ include Summarizable
22
+ # Name of correlation
23
+ attr_accessor :name
24
+ # Tails for probability (:both, :left or :right)
25
+ attr_accessor :tails
26
+ attr_accessor :n
27
+ def initialize(v1,v2,opts=Hash.new)
28
+ @v1_name,@v2_name = v1.name,v2.name
29
+ @v1,@v2 = Statsample.only_valid_clone(v1,v2)
30
+ @n=@v1.size
31
+ opts_default={
32
+ :name=>_("Correlation (%s - %s)") % [@v1_name, @v2_name],
33
+ :tails=>:both
34
+ }
35
+ @opts=opts.merge(opts_default)
36
+ @opts.each{|k,v|
37
+ self.send("#{k}=",v) if self.respond_to? k
38
+ }
39
+ end
40
+ def r
41
+ Statsample::Bivariate.pearson(@v1,@v2)
42
+ end
43
+ def t
44
+ Statsample::Bivariate.t_pearson(@v1,@v2)
45
+ end
46
+ def probability
47
+ p_using_cdf(Distribution::T.cdf(t, @v1.size-2), tails)
48
+ end
49
+ def report_building(builder)
50
+ builder.text(_("%s : r=%0.3f (t:%0.3f, g.l.=%d, p:%0.3f / %s tails)") % [@name, r,t, (n-2), probability, tails])
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,182 @@
1
+ require 'yaml'
2
+
3
+ module Statsample
4
+ # This module aids to code open questions
5
+ # * Select one or more vectors of a dataset, to create a yaml files, on which each vector is a hash, which keys and values are the vector's factors . If data have Statsample::SPLIT_TOKEN on a value, each value will be separated on two or more hash keys.
6
+ # * Edit the yaml and replace the values of hashes with your codes. If you need to create two or mores codes for an answer, use the separator (default Statsample::SPLIT_TOKEN)
7
+ # * Recode the vectors, loading the yaml file:
8
+ # * recode_dataset_simple!() : The new vectors have the same name of the original plus "_recoded"
9
+ # * recode_dataset_split!() : Create equal number of vectors as values. See Vector.add_vectors_by_split() for arguments
10
+ #
11
+ # Usage:
12
+ # recode_file="recodification.yaml"
13
+ # phase=:first # flag
14
+ # if phase==:first
15
+ # File.open(recode_file,"w") {|fp|
16
+ # Statsample::Codification.create_yaml(ds,%w{vector1 vector2}, ",",fp)
17
+ # }
18
+ # # Edit the file recodification.yaml and verify changes
19
+ # elsif phase==:second
20
+ # File.open(recode_file,"r") {|fp|
21
+ # Statsample::Codification.verify(fp,['vector1'])
22
+ # }
23
+ # # Add new vectors to the dataset
24
+ # elsif phase==:third
25
+ # File.open(recode_file,"r") {|fp|
26
+ # Statsample::Codification.recode_dataset_split!(ds,fp,"*")
27
+ # }
28
+ # end
29
+ #
30
+ module Codification
31
+ class << self
32
+ # Create a hash, based on vectors, to create the dictionary.
33
+ # The keys will be vectors name on dataset and the values
34
+ # will be hashes, with keys = values, for recodification
35
+ def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
36
+ raise ArgumentError,"Array should't be empty" if vectors.size==0
37
+ pro_hash = vectors.inject({}) do |h,v_name|
38
+ v_name = v_name.is_a?(Numeric) ? v_name : v_name.to_sym
39
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if
40
+ !dataset.vectors.include?(v_name)
41
+ v = dataset[v_name]
42
+ split_data = v.splitted(sep)
43
+ .flatten
44
+ .collect { |c| c.to_s }
45
+ .find_all{ |c| !c.nil? }
46
+
47
+ factors = split_data.uniq
48
+ .compact
49
+ .sort
50
+ .inject({}) { |ac,val| ac[val] = val; ac }
51
+ h[v_name] = factors
52
+ h
53
+ end
54
+
55
+ pro_hash
56
+ end
57
+ # Create a yaml to create a dictionary, based on vectors
58
+ # The keys will be vectors name on dataset and the values
59
+ # will be hashes, with keys = values, for recodification
60
+ #
61
+ # v1 = Daru::Vector.new(%w{a,b b,c d})
62
+ # ds = Daru::DataFrame.new({:v1 => v1})
63
+ # Statsample::Codification.create_yaml(ds,[:v1])
64
+ # => "--- \nv1: \n a: a\n b: b\n c: c\n d: d\n"
65
+ def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
66
+ pro_hash=create_hash(dataset, vectors, sep)
67
+ YAML.dump(pro_hash,io)
68
+ end
69
+ # Create a excel to create a dictionary, based on vectors.
70
+ # Raises an error if filename exists
71
+ # The rows will be:
72
+ # * field: name of vector
73
+ # * original: original name
74
+ # * recoded: new code
75
+
76
+ def create_excel(dataset, vectors, filename, sep=Statsample::SPLIT_TOKEN)
77
+ require 'spreadsheet'
78
+ if File.exist?(filename)
79
+ raise "Exists a file named #{filename}. Delete ir before overwrite."
80
+ end
81
+ book = Spreadsheet::Workbook.new
82
+ sheet = book.create_worksheet
83
+ sheet.row(0).concat(%w(field original recoded))
84
+ i = 1
85
+ create_hash(dataset, vectors, sep).sort.each do |field, inner_hash|
86
+ inner_hash.sort.each do |k,v|
87
+ sheet.row(i).concat([field.to_s,k.to_s,v.to_s])
88
+ i += 1
89
+ end
90
+ end
91
+
92
+ book.write(filename)
93
+ end
94
+ # From a excel generates a dictionary hash
95
+ # to use on recode_dataset_simple!() or recode_dataset_split!().
96
+ #
97
+ def excel_to_recoded_hash(filename)
98
+ require 'spreadsheet'
99
+ h={}
100
+ book = Spreadsheet.open filename
101
+ sheet= book.worksheet 0
102
+ row_i=0
103
+ sheet.each do |row|
104
+ row_i += 1
105
+ next if row_i == 1 or row[0].nil? or row[1].nil? or row[2].nil?
106
+ key = row[0].to_sym
107
+ h[key] ||= {}
108
+ h[key][row[1]] = row[2]
109
+ end
110
+ h
111
+ end
112
+
113
+ def inverse_hash(h, sep=Statsample::SPLIT_TOKEN)
114
+ h.inject({}) do |a,v|
115
+ v[1].split(sep).each do |val|
116
+ a[val]||=[]
117
+ a[val].push(v[0])
118
+ end
119
+ a
120
+ end
121
+ end
122
+
123
+ def dictionary(h, sep=Statsample::SPLIT_TOKEN)
124
+ h.inject({}) { |a,v| a[v[0]]=v[1].split(sep); a }
125
+ end
126
+
127
+ def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
128
+ dict = dictionary(h,sep)
129
+ new_data = v.splitted(sep)
130
+ new_data.collect do |c|
131
+ if c.nil?
132
+ nil
133
+ else
134
+ c.collect{|value| dict[value] }.flatten.uniq
135
+ end
136
+ end
137
+ end
138
+ def recode_dataset_simple!(dataset, dictionary_hash ,sep=Statsample::SPLIT_TOKEN)
139
+ _recode_dataset(dataset,dictionary_hash ,sep,false)
140
+ end
141
+ def recode_dataset_split!(dataset, dictionary_hash, sep=Statsample::SPLIT_TOKEN)
142
+ _recode_dataset(dataset, dictionary_hash, sep,true)
143
+ end
144
+
145
+ def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
146
+ v_names||=h.keys
147
+ v_names.each do |v_name|
148
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.vectors.include? v_name
149
+ recoded = Daru::Vector.new(
150
+ recode_vector(dataset[v_name], h[v_name],sep).collect do |c|
151
+ if c.nil?
152
+ nil
153
+ else
154
+ c.join(sep)
155
+ end
156
+ end
157
+ )
158
+ if split
159
+ recoded.split_by_separator(sep).each {|k,v|
160
+ dataset[(v_name.to_s + "_" + k).to_sym] = v
161
+ }
162
+ else
163
+ dataset[(v_name.to_s + "_recoded").to_sym] = recoded
164
+ end
165
+ end
166
+ end
167
+
168
+
169
+ def verify(h, v_names=nil,sep=Statsample::SPLIT_TOKEN,io=$>)
170
+ require 'pp'
171
+ v_names||=h.keys
172
+ v_names.each{|v_name|
173
+ inverse=inverse_hash(h[v_name],sep)
174
+ io.puts "- Field: #{v_name}"
175
+ inverse.sort{|a,b| -(a[1].count<=>b[1].count)}.each {|k,v|
176
+ io.puts " - \"#{k}\" (#{v.count}) :\n -'"+v.join("\n -'")+"'"
177
+ }
178
+ }
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,28 @@
1
+ # This module will be removed in the next release.
2
+ # Please shift to using Daru::DataFrame.from_csv and #write_csv for CSV
3
+ # related operations.
4
+ module Statsample
5
+ class CSV
6
+ class << self
7
+ # Return a DataFrom created from a csv file.
8
+ #
9
+ # == NOTE
10
+ #
11
+ # This method has been DEPRECATED in favour of Daru::DataFrame.from_csv.
12
+ # Please switch to using that.
13
+ def read(filename, empty = [''], ignore_lines = 0, opts = {})
14
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_csv instead."
15
+ end
16
+
17
+ # Save a Dataset on a csv file.
18
+ #
19
+ # == NOTE
20
+ #
21
+ # This method has BEEN DEPRECATED in favor of Daru::DataFrame#write_csv.
22
+ # Please use that instead.
23
+ def write(dataset, filename, convert_comma = false, opts = {})
24
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_csv instead."
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,48 @@
1
+ module Statsample
2
+ module SPSS
3
+ class << self
4
+ # Export a SPSS Matrix with tetrachoric correlations .
5
+ #
6
+ # Use:
7
+ # ds=Daru::DataFrame.from_excel("my_data.xls")
8
+ # puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
9
+ def tetrachoric_correlation_matrix(ds)
10
+ dsv=ds.reject_values(*Daru::MISSING_VALUES)
11
+ # Delete all vectors doesn't have variation
12
+ dsv.vectors.each { |f|
13
+ if dsv[f].factors.size==1
14
+ dsv.delete_vector(f)
15
+ else
16
+ dsv[f]=dsv[f].dichotomize
17
+ end
18
+ }
19
+
20
+ tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
21
+ n=dsv.vectors.to_a.collect {|f|
22
+ sprintf("%d",dsv[f].size)
23
+ }
24
+ meanlist=dsv.vectors.to_a.collect{|f|
25
+ sprintf("%0.3f", dsv[f].mean)
26
+ }
27
+ stddevlist=dsv.vectors.to_a.collect{|f|
28
+ sprintf("%0.3f", dsv[f].sd)
29
+ }
30
+ out=<<-HEREDOC
31
+ MATRIX DATA VARIABLES=ROWTYPE_ #{dsv.fields.join(",")}.
32
+ BEGIN DATA
33
+ N #{n.join(" ")}
34
+ MEAN #{meanlist.join(" ")}
35
+ STDDEV #{stddevlist.join(" ")}
36
+ HEREDOC
37
+ tcm.row_size.times {|i|
38
+ out +="CORR "
39
+ (i+1).times {|j|
40
+ out+=sprintf("%0.3f",tcm[i,j])+" "
41
+ }
42
+ out +="\n"
43
+ }
44
+ out+="END DATA.\nEXECUTE.\n"
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,211 @@
1
+ require 'statsample/converter/spss'
2
+ module Statsample
3
+ # Create and dumps Datasets on a database
4
+ #
5
+ # == NOTE
6
+ #
7
+ # Deprecated. Use Daru::DataFrame.from_sql and Daru::DataFrame#write_sql
8
+ module Database
9
+ class << self
10
+ # Read a database query and returns a Dataset
11
+ #
12
+ # == NOTE
13
+ #
14
+ # Deprecated. Use Daru::DataFrame.from_sql instead.
15
+ def read(dbh,query)
16
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_sql instead."
17
+ end
18
+
19
+ # Insert each case of the Dataset on the selected table
20
+ #
21
+ # == NOTE
22
+ #
23
+ # Deprecated. Use Daru::DataFrame#write_sql instead
24
+ def insert(ds, dbh, table)
25
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_sql instead."
26
+ end
27
+ # Create a sql, basen on a given Dataset
28
+ #
29
+ # == NOTE
30
+ #
31
+ # Deprecated. Use Daru::DataFrame#create_sql instead.
32
+ def create_sql(ds,table,charset="UTF8")
33
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#create_sql instead."
34
+ end
35
+ end
36
+ end
37
+ module Mondrian
38
+ class << self
39
+ def write(dataset,filename)
40
+ File.open(filename,"wb") do |fp|
41
+ fp.puts dataset.vectors.to_a.join("\t")
42
+ dataset.each_row do |row|
43
+ row2 = row.map { |v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
44
+ fp.puts row2.join("\t")
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+
51
+ class PlainText
52
+ class << self
53
+ def read(filename, fields)
54
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_plaintext instead."
55
+ end
56
+ end
57
+ end
58
+
59
+ # This class has been DEPRECATED. Use Daru::DataFrame::from_excel
60
+ # Daru::DataFrame#write_excel for XLS file operations.
61
+ class Excel
62
+ class << self
63
+ # Write a Excel spreadsheet based on a dataset
64
+ # * TODO: Format nicely date values
65
+ #
66
+ # == NOTE
67
+ #
68
+ # Deprecated. Use Daru::DataFrame#write_csv.
69
+ def write(dataset,filename)
70
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_excel instead."
71
+ end
72
+
73
+ # Returns a dataset based on a xls file
74
+ #
75
+ # == NOTE
76
+ #
77
+ # Deprecated. Use Daru::DataFrame.from_excel instead.
78
+ def read(filename, opts=Hash.new)
79
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_excel instead."
80
+ end
81
+ end
82
+ end
83
+
84
+ module Mx
85
+ class << self
86
+ def write(dataset,filename,type=:covariance)
87
+ puts "Writing MX File"
88
+ File.open(filename,"w") do |fp|
89
+ fp.puts "! #{filename}"
90
+ fp.puts "! Output generated by Statsample"
91
+ fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
92
+ fp.puts "Labels " + dataset.vectors.to_a.join(" ")
93
+ case type
94
+ when :raw
95
+ fp.puts "Rectangular"
96
+ dataset.each do |row|
97
+ out=dataset.vectors.to_a.collect do |f|
98
+ if dataset[f].is_valid? row[f]
99
+ row[f]
100
+ else
101
+ "."
102
+ end
103
+ end
104
+ fp.puts out.join("\t")
105
+ end
106
+ fp.puts "End Rectangular"
107
+ when :covariance
108
+ fp.puts " CMatrix Full"
109
+ cm=Statsample::Bivariate.covariance_matrix(dataset)
110
+ d=(0...(cm.row_size)).collect {|row|
111
+ (0...(cm.column_size)).collect{|col|
112
+ cm[row,col].nil? ? "." : sprintf("%0.3f", cm[row,col])
113
+ }.join(" ")
114
+ }.join("\n")
115
+ fp.puts d
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
121
+ module GGobi
122
+ class << self
123
+ def write(dataset,filename,opt={})
124
+ File.open(filename,"w") {|fp|
125
+ fp.write(self.out(dataset,opt))
126
+ }
127
+ end
128
+ def out(dataset,opt={})
129
+ require 'ostruct'
130
+ default_opt = {:dataname => "Default", :description=>"", :missing=>"NA"}
131
+ default_opt.merge! opt
132
+ carrier=OpenStruct.new
133
+ carrier.categorials=[]
134
+ carrier.conversions={}
135
+ variables_def=dataset.vectors.to_a.collect{|k|
136
+ variable_definition(carrier,dataset[k],k)
137
+ }.join("\n")
138
+
139
+ indexes=carrier.categorials.inject({}) {|s,c|
140
+ s[dataset.vectors.to_a.index(c)]=c
141
+ s
142
+ }
143
+ records=""
144
+ dataset.each_row {|c|
145
+ indexes.each { |ik,iv|
146
+ c[ik] = carrier.conversions[iv][c[ik]]
147
+ }
148
+ records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
149
+ }
150
+
151
+ out=<<EOC
152
+ <?xml version="1.0"?>
153
+ <!DOCTYPE ggobidata SYSTEM "ggobi.dtd">
154
+ <ggobidata count="1">
155
+ <data name="#{default_opt[:dataname]}">
156
+ <description>#{default_opt[:description]}</description>
157
+ <variables count="#{dataset.fields.size}">
158
+ #{variables_def}
159
+ </variables>
160
+ <records count="#{dataset.cases}" missingValue="#{default_opt[:missing]}">
161
+ #{records}
162
+ </records>
163
+
164
+ </data>
165
+ </ggobidata>
166
+ EOC
167
+
168
+ out
169
+
170
+ end
171
+ def values_definition(c,missing)
172
+ c.collect{|v|
173
+ if v.nil?
174
+ "#{missing}"
175
+ elsif v.is_a? Numeric
176
+ "#{v}"
177
+ else
178
+ "#{v.gsub(/\s+/,"_")}"
179
+ end
180
+ }.join(" ")
181
+ end
182
+ # Outputs a string for a variable definition
183
+ # v = vector
184
+ # name = name of the variable
185
+ # nickname = nickname
186
+ def variable_definition(carrier,v,name,nickname=nil)
187
+ nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
188
+ if v.type==:object or v.to_a.find {|d| d.is_a? String }
189
+ carrier.categorials.push(name)
190
+ carrier.conversions[name]={}
191
+ factors=v.factors
192
+ out ="<categoricalvariable name=\"#{name}\" #{nickname}>\n"
193
+ out << "<levels count=\"#{factors.size}\">\n"
194
+ out << (1..factors.size).to_a.collect{|i|
195
+ carrier.conversions[name][factors[i-1]]=i
196
+ "<level value=\"#{i}\">#{(v.labels[factors[i-1]] || factors[i-1])}</level>"
197
+ }.join("\n")
198
+ out << "</levels>\n</categoricalvariable>\n"
199
+ out
200
+ elsif v.to_a.find {|d| d.is_a? Float}
201
+ "<realvariable name=\"#{name}\" #{nickname} />"
202
+ else
203
+ "<integervariable name=\"#{name}\" #{nickname} />"
204
+ end
205
+ end
206
+ end
207
+ end
208
+ end
209
+
210
+ require 'statsample/converter/csv.rb'
211
+