statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -7,8 +7,8 @@ module Statsample
7
7
  # variables.
8
8
  #
9
9
  # == Usage
10
- # a = [1,2,3,4,5,6].to_numeric
11
- # b = [2,3,4,5,6,7].to_numeric
10
+ # a = Daru::Vector.new([1,2,3,4,5,6])
11
+ # b = Daru::Vector.new([2,3,4,5,6,7])
12
12
  # pearson = Statsample::Bivariate::Pearson.new(a,b)
13
13
  # puts pearson.r
14
14
  # puts pearson.t
@@ -34,24 +34,33 @@ module Statsample
34
34
  # will be hashes, with keys = values, for recodification
35
35
  def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
36
36
  raise ArgumentError,"Array should't be empty" if vectors.size==0
37
- pro_hash=vectors.inject({}){|h,v_name|
38
- raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
39
- v=dataset[v_name]
40
- split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}
37
+ pro_hash = vectors.inject({}) do |h,v_name|
38
+ v_name = v_name.is_a?(Numeric) ? v_name : v_name.to_sym
39
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if
40
+ !dataset.vectors.include?(v_name)
41
+ v = dataset[v_name]
42
+ split_data = v.splitted(sep)
43
+ .flatten
44
+ .collect { |c| c.to_s }
45
+ .find_all{ |c| !c.nil? }
41
46
 
42
- factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
43
- h[v_name]=factors
47
+ factors = split_data.uniq
48
+ .compact
49
+ .sort
50
+ .inject({}) { |ac,val| ac[val] = val; ac }
51
+ h[v_name] = factors
44
52
  h
45
- }
53
+ end
54
+
46
55
  pro_hash
47
56
  end
48
57
  # Create a yaml to create a dictionary, based on vectors
49
58
  # The keys will be vectors name on dataset and the values
50
59
  # will be hashes, with keys = values, for recodification
51
60
  #
52
- # v1=%w{a,b b,c d}.to_vector
53
- # ds={"v1"=>v1}.to_dataset
54
- # Statsample::Codification.create_yaml(ds,['v1'])
61
+ # v1 = Daru::Vector.new(%w{a,b b,c d})
62
+ # ds = Daru::DataFrame.new({:v1 => v1})
63
+ # Statsample::Codification.create_yaml(ds,[:v1])
55
64
  # => "--- \nv1: \n a: a\n b: b\n c: c\n d: d\n"
56
65
  def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
57
66
  pro_hash=create_hash(dataset, vectors, sep)
@@ -69,16 +78,17 @@ module Statsample
69
78
  if File.exist?(filename)
70
79
  raise "Exists a file named #{filename}. Delete ir before overwrite."
71
80
  end
72
- book = Spreadsheet::Workbook.new
81
+ book = Spreadsheet::Workbook.new
73
82
  sheet = book.create_worksheet
74
- sheet.row(0).concat(%w{field original recoded})
75
- i=1
83
+ sheet.row(0).concat(%w(field original recoded))
84
+ i = 1
76
85
  create_hash(dataset, vectors, sep).sort.each do |field, inner_hash|
77
86
  inner_hash.sort.each do |k,v|
78
- sheet.row(i).concat([field.dup,k.dup,v.dup])
79
- i+=1
87
+ sheet.row(i).concat([field.to_s,k.to_s,v.to_s])
88
+ i += 1
80
89
  end
81
90
  end
91
+
82
92
  book.write(filename)
83
93
  end
84
94
  # From a excel generates a dictionary hash
@@ -91,10 +101,11 @@ module Statsample
91
101
  sheet= book.worksheet 0
92
102
  row_i=0
93
103
  sheet.each do |row|
94
- row_i+=1
95
- next if row_i==1 or row[0].nil? or row[1].nil? or row[2].nil?
96
- h[row[0]]={} if h[row[0]].nil?
97
- h[row[0]][row[1]]=row[2]
104
+ row_i += 1
105
+ next if row_i == 1 or row[0].nil? or row[1].nil? or row[2].nil?
106
+ key = row[0].to_sym
107
+ h[key] ||= {}
108
+ h[key][row[1]] = row[2]
98
109
  end
99
110
  h
100
111
  end
@@ -110,12 +121,12 @@ module Statsample
110
121
  end
111
122
 
112
123
  def dictionary(h, sep=Statsample::SPLIT_TOKEN)
113
- h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
124
+ h.inject({}) { |a,v| a[v[0]]=v[1].split(sep); a }
114
125
  end
115
126
 
116
127
  def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
117
- dict=dictionary(h,sep)
118
- new_data=v.splitted(sep)
128
+ dict = dictionary(h,sep)
129
+ new_data = v.splitted(sep)
119
130
  new_data.collect do |c|
120
131
  if c.nil?
121
132
  nil
@@ -134,20 +145,22 @@ module Statsample
134
145
  def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
135
146
  v_names||=h.keys
136
147
  v_names.each do |v_name|
137
- raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
138
- recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
139
- if c.nil?
140
- nil
141
- else
142
- c.join(sep)
148
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.vectors.include? v_name
149
+ recoded = Daru::Vector.new(
150
+ recode_vector(dataset[v_name], h[v_name],sep).collect do |c|
151
+ if c.nil?
152
+ nil
153
+ else
154
+ c.join(sep)
155
+ end
143
156
  end
144
- }.to_vector
145
- if(split)
157
+ )
158
+ if split
146
159
  recoded.split_by_separator(sep).each {|k,v|
147
- dataset[v_name+"_"+k]=v
160
+ dataset[(v_name.to_s + "_" + k).to_sym] = v
148
161
  }
149
162
  else
150
- dataset[v_name+"_recoded"]=recoded
163
+ dataset[(v_name.to_s + "_recoded").to_sym] = recoded
151
164
  end
152
165
  end
153
166
  end
@@ -1,65 +1,27 @@
1
- require 'csv'
2
-
1
+ # This module will be removed in the next release.
2
+ # Please shift to using Daru::DataFrame.from_csv and #write_csv for CSV
3
+ # related operations.
3
4
  module Statsample
4
- class CSV < SpreadsheetBase
5
- # Default options for processing CSV files. Accept the same options as
6
- # Ruby's `CSV#new`.
7
- DEFAULT_OPTIONS = {
8
- converters: [:numeric]
9
- }
10
-
5
+ class CSV
11
6
  class << self
12
- # Return a Dataset created from a csv file.
7
+ # Return a DataFrom created from a csv file.
13
8
  #
14
- # USE:
15
- # ds = Statsample::CSV.read('test_csv.csv')
9
+ # == NOTE
10
+ #
11
+ # This method has been DEPRECATED in favour of Daru::DataFrame.from_csv.
12
+ # Please switch to using that.
16
13
  def read(filename, empty = [''], ignore_lines = 0, opts = {})
17
- first_row = true
18
- fields = []
19
- ds = nil
20
- line_number = 0
21
- options = DEFAULT_OPTIONS.merge(opts)
22
-
23
- csv = ::CSV.open(filename, 'rb', options)
24
-
25
- csv.each do |row|
26
- line_number += 1
27
-
28
- if (line_number <= ignore_lines)
29
- next
30
- end
31
-
32
- if first_row
33
- fields = extract_fields(row)
34
- ds = Statsample::Dataset.new(fields)
35
- first_row = false
36
- else
37
- rowa = process_row(row, empty)
38
- ds.add_case(rowa, false)
39
- end
40
- end
41
-
42
- convert_to_numeric_and_date(ds, fields)
43
- ds.update_valid_data
44
- ds
14
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_csv instead."
45
15
  end
46
16
 
47
17
  # Save a Dataset on a csv file.
48
18
  #
49
- # USE:
50
- # Statsample::CSV.write(ds, 'test_csv.csv')
19
+ # == NOTE
20
+ #
21
+ # This method has BEEN DEPRECATED in favor of Daru::DataFrame#write_csv.
22
+ # Please use that instead.
51
23
  def write(dataset, filename, convert_comma = false, opts = {})
52
- options = DEFAULT_OPTIONS.merge(opts)
53
-
54
- writer = ::CSV.open(filename, 'w', options)
55
- writer << dataset.fields
56
-
57
- dataset.each_array do |row|
58
- row.collect! { |v| v.to_s.gsub('.', ',') } if convert_comma
59
- writer << row
60
- end
61
-
62
- writer.close
24
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_csv instead."
63
25
  end
64
26
  end
65
27
  end
@@ -4,26 +4,27 @@ module Statsample
4
4
  # Export a SPSS Matrix with tetrachoric correlations .
5
5
  #
6
6
  # Use:
7
- # ds=Statsample::Excel.read("my_data.xls")
7
+ # ds=Daru::DataFrame.from_excel("my_data.xls")
8
8
  # puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
9
9
  def tetrachoric_correlation_matrix(ds)
10
10
  dsv=ds.dup_only_valid
11
11
  # Delete all vectors doesn't have variation
12
- dsv.fields.each{|f|
12
+ dsv.vectors.each { |f|
13
13
  if dsv[f].factors.size==1
14
14
  dsv.delete_vector(f)
15
15
  else
16
16
  dsv[f]=dsv[f].dichotomize
17
17
  end
18
18
  }
19
+
19
20
  tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
20
- n=dsv.fields.collect {|f|
21
+ n=dsv.vectors.to_a.collect {|f|
21
22
  sprintf("%d",dsv[f].size)
22
23
  }
23
- meanlist=dsv.fields.collect{|f|
24
+ meanlist=dsv.vectors.to_a.collect{|f|
24
25
  sprintf("%0.3f", dsv[f].mean)
25
26
  }
26
- stddevlist=dsv.fields.collect{|f|
27
+ stddevlist=dsv.vectors.to_a.collect{|f|
27
28
  sprintf("%0.3f", dsv[f].sd)
28
29
  }
29
30
  out=<<-HEREDOC
@@ -1,63 +1,36 @@
1
1
  require 'statsample/converter/spss'
2
2
  module Statsample
3
- # Create and dumps Datasets on a database
3
+ # Create and dumps Datasets on a database
4
+ #
5
+ # == NOTE
6
+ #
7
+ # Deprecated. Use Daru::DataFrame.from_sql and Daru::DataFrame#write_sql
4
8
  module Database
5
9
  class << self
6
10
  # Read a database query and returns a Dataset
7
11
  #
8
- # USE:
9
- #
10
- # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
11
- # Statsample.read(dbh, "SELECT * FROM test")
12
- #
12
+ # == NOTE
13
+ #
14
+ # Deprecated. Use Daru::DataFrame.from_sql instead.
13
15
  def read(dbh,query)
14
- require 'dbi'
15
- sth=dbh.execute(query)
16
- vectors={}
17
- fields=[]
18
- sth.column_info.each {|c|
19
- vectors[c['name']]=Statsample::Vector.new([])
20
- vectors[c['name']].name=c['name']
21
- vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :numeric : :object
22
- fields.push(c['name'])
23
- }
24
- ds=Statsample::Dataset.new(vectors,fields)
25
- sth.fetch do |row|
26
- ds.add_case(row.to_a, false )
27
- end
28
- ds.update_valid_data
29
- ds
16
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_sql instead."
30
17
  end
18
+
31
19
  # Insert each case of the Dataset on the selected table
32
20
  #
33
- # USE:
34
- #
35
- # ds={'id'=>[1,2,3].to_vector, 'name'=>["a","b","c"].to_vector}.to_dataset
36
- # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
37
- # Statsample::Database.insert(ds,dbh,"test")
38
- #
21
+ # == NOTE
22
+ #
23
+ # Deprecated. Use Daru::DataFrame#write_sql instead
39
24
  def insert(ds, dbh, table)
40
- require 'dbi'
41
- query="INSERT INTO #{table} ("+ds.fields.join(",")+") VALUES ("+((["?"]*ds.fields.size).join(","))+")"
42
- sth=dbh.prepare(query)
43
- ds.each_array{|c| sth.execute(*c) }
44
- return true
25
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_sql instead."
45
26
  end
46
27
  # Create a sql, basen on a given Dataset
47
28
  #
48
- # USE:
49
- #
50
- # ds={'id'=>[1,2,3,4,5].to_vector,'name'=>%w{Alex Peter Susan Mary John}.to_vector}.to_dataset
51
- # Statsample::Database.create_sql(ds,'names')
52
- # ==>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
53
- #
29
+ # == NOTE
30
+ #
31
+ # Deprecated. Use Daru::DataFrame#create_sql instead.
54
32
  def create_sql(ds,table,charset="UTF8")
55
- sql="CREATE TABLE #{table} ("
56
- fields=ds.fields.collect{|f|
57
- v=ds[f]
58
- f+" "+v.db_type
59
- }
60
- sql+fields.join(",\n ")+") CHARACTER SET=#{charset};"
33
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#create_sql instead."
61
34
  end
62
35
  end
63
36
  end
@@ -65,182 +38,49 @@ module Statsample
65
38
  class << self
66
39
  def write(dataset,filename)
67
40
  File.open(filename,"wb") do |fp|
68
- fp.puts dataset.fields.join("\t")
69
- dataset.each_array_with_nils do |row|
70
- row2=row.collect{|v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
41
+ fp.puts dataset.vectors.to_a.join("\t")
42
+ dataset.each_row do |row|
43
+ row2 = row.map { |v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
71
44
  fp.puts row2.join("\t")
72
45
  end
73
46
  end
74
47
  end
75
48
  end
76
49
  end
77
- class SpreadsheetBase
78
- class << self
79
- def extract_fields(row)
80
- i=0;
81
- fields=row.to_a.collect{|c|
82
- if c.nil?
83
- i+=1
84
- "var%05d" % i
85
- else
86
- c.to_s.downcase
87
- end
88
- }
89
- fields.recode_repeated
90
- end
91
50
 
92
- def process_row(row,empty)
93
- row.to_a.map do |c|
94
- if empty.include?(c)
95
- nil
96
- else
97
- if c.is_a? String and c.is_number?
98
- if c=~/^\d+$/
99
- c.to_i
100
- else
101
- c.gsub(",",".").to_f
102
- end
103
- else
104
- c
105
- end
106
- end
107
- end
108
- end
109
- def convert_to_numeric_and_date(ds,fields)
110
- fields.each do |f|
111
- if ds[f].can_be_numeric?
112
- ds[f].type=:numeric
113
- elsif ds[f].can_be_date?
114
- ds[f].type=:date
115
- end
116
- end
51
+ class PlainText
52
+ class << self
53
+ def read(filename, fields)
54
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_plaintext instead."
117
55
  end
118
-
119
56
  end
120
57
  end
121
- class PlainText < SpreadsheetBase
122
- class << self
123
- def read(filename, fields)
124
- ds=Statsample::Dataset.new(fields)
125
- fp=File.open(filename,"r")
126
- fp.each_line do |line|
127
- row=process_row(line.strip.split(/\s+/),[""])
128
- next if row==["\x1A"]
129
- ds.add_case_array(row)
130
- end
131
- convert_to_numeric_and_date(ds,fields)
132
- ds.update_valid_data
133
- fields.each {|f|
134
- ds[f].name=f
135
- }
136
- ds
137
- end
138
- end
139
- end
140
- class Excel < SpreadsheetBase
58
+
59
+ # This class has been DEPRECATED. Use Daru::DataFrame::from_excel
60
+ # Daru::DataFrame#write_excel for XLS file operations.
61
+ class Excel
141
62
  class << self
142
63
  # Write a Excel spreadsheet based on a dataset
143
64
  # * TODO: Format nicely date values
65
+ #
66
+ # == NOTE
67
+ #
68
+ # Deprecated. Use Daru::DataFrame#write_csv.
144
69
  def write(dataset,filename)
145
- require 'spreadsheet'
146
- book = Spreadsheet::Workbook.new
147
- sheet = book.create_worksheet
148
- format = Spreadsheet::Format.new :color => :blue,
149
- :weight => :bold
150
- sheet.row(0).concat(dataset.fields.map {|i| i.dup}) # Unfreeze strings
151
- sheet.row(0).default_format = format
152
- i=1
153
- dataset.each_array{|row|
154
- sheet.row(i).concat(row)
155
- i+=1
156
- }
157
- book.write(filename)
158
- end
159
- # This should be fixed.
160
- # If we have a Formula, should be resolver first
161
-
162
- def preprocess_row(row, dates)
163
- i=-1
164
- row.collect!{|c|
165
- i+=1
166
- if c.is_a? Spreadsheet::Formula
167
- if(c.value.is_a? Spreadsheet::Excel::Error)
168
- nil
169
- else
170
- c.value
171
- end
172
- elsif dates.include? i and !c.nil? and c.is_a? Numeric
173
- row.date(i)
174
- else
175
- c
176
- end
177
- }
70
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_excel instead."
178
71
  end
179
- private :process_row, :preprocess_row
180
72
 
181
73
  # Returns a dataset based on a xls file
182
- # USE:
183
- # ds = Statsample::Excel.read("test.xls")
184
- #
74
+ #
75
+ # == NOTE
76
+ #
77
+ # Deprecated. Use Daru::DataFrame.from_excel instead.
185
78
  def read(filename, opts=Hash.new)
186
- require 'spreadsheet'
187
- raise "options should be Hash" unless opts.is_a? Hash
188
- opts_default={
189
- :worksheet_id=>0,
190
- :ignore_lines=>0,
191
- :empty=>['']
192
- }
193
-
194
- opts=opts_default.merge opts
195
-
196
- worksheet_id=opts[:worksheet_id]
197
- ignore_lines=opts[:ignore_lines]
198
- empty=opts[:empty]
199
-
200
- first_row=true
201
- fields=[]
202
- ds=nil
203
- line_number=0
204
- book = Spreadsheet.open filename
205
- sheet= book.worksheet worksheet_id
206
- sheet.each do |row|
207
- begin
208
- dates=[]
209
- row.formats.each_index{|i|
210
- if !row.formats[i].nil? and row.formats[i].number_format=="DD/MM/YYYY"
211
- dates.push(i)
212
- end
213
- }
214
- line_number+=1
215
- next if(line_number<=ignore_lines)
216
-
217
- preprocess_row(row,dates)
218
- if first_row
219
- fields=extract_fields(row)
220
- ds=Statsample::Dataset.new(fields)
221
- first_row=false
222
- else
223
- rowa=process_row(row,empty)
224
- (fields.size - rowa.size).times {
225
- rowa << nil
226
- }
227
- ds.add_case(rowa,false)
228
- end
229
- rescue => e
230
- error="#{e.to_s}\nError on Line # #{line_number}:#{row.join(",")}"
231
- raise
232
- end
233
- end
234
- convert_to_numeric_and_date(ds, fields)
235
- ds.update_valid_data
236
- fields.each {|f|
237
- ds[f].name=f
238
- }
239
- ds.name=filename
240
- ds
79
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_excel instead."
241
80
  end
242
81
  end
243
82
  end
83
+
244
84
  module Mx
245
85
  class << self
246
86
  def write(dataset,filename,type=:covariance)
@@ -249,12 +89,12 @@ module Statsample
249
89
  fp.puts "! #{filename}"
250
90
  fp.puts "! Output generated by Statsample"
251
91
  fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
252
- fp.puts "Labels "+dataset.fields.join(" ")
92
+ fp.puts "Labels " + dataset.vectors.to_a.join(" ")
253
93
  case type
254
94
  when :raw
255
95
  fp.puts "Rectangular"
256
96
  dataset.each do |row|
257
- out=dataset.fields.collect do |f|
97
+ out=dataset.vectors.to_a.collect do |f|
258
98
  if dataset[f].is_valid? row[f]
259
99
  row[f]
260
100
  else
@@ -292,18 +132,18 @@ module Statsample
292
132
  carrier=OpenStruct.new
293
133
  carrier.categorials=[]
294
134
  carrier.conversions={}
295
- variables_def=dataset.fields.collect{|k|
135
+ variables_def=dataset.vectors.to_a.collect{|k|
296
136
  variable_definition(carrier,dataset[k],k)
297
137
  }.join("\n")
298
138
 
299
139
  indexes=carrier.categorials.inject({}) {|s,c|
300
- s[dataset.fields.index(c)]=c
140
+ s[dataset.vectors.to_a.index(c)]=c
301
141
  s
302
142
  }
303
143
  records=""
304
- dataset.each_array {|c|
305
- indexes.each{|ik,iv|
306
- c[ik]=carrier.conversions[iv][c[ik]]
144
+ dataset.each_row {|c|
145
+ indexes.each { |ik,iv|
146
+ c[ik] = carrier.conversions[iv][c[ik]]
307
147
  }
308
148
  records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
309
149
  }
@@ -345,7 +185,7 @@ out
345
185
  # nickname = nickname
346
186
  def variable_definition(carrier,v,name,nickname=nil)
347
187
  nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
348
- if v.type==:object or v.data.find {|d| d.is_a? String }
188
+ if v.type==:object or v.to_a.find {|d| d.is_a? String }
349
189
  carrier.categorials.push(name)
350
190
  carrier.conversions[name]={}
351
191
  factors=v.factors
@@ -353,17 +193,16 @@ out
353
193
  out << "<levels count=\"#{factors.size}\">\n"
354
194
  out << (1..factors.size).to_a.collect{|i|
355
195
  carrier.conversions[name][factors[i-1]]=i
356
- "<level value=\"#{i}\">#{v.labeling(factors[i-1])}</level>"
196
+ "<level value=\"#{i}\">#{(v.labels[factors[i-1]] || factors[i-1])}</level>"
357
197
  }.join("\n")
358
198
  out << "</levels>\n</categoricalvariable>\n"
359
199
  out
360
- elsif v.data.find {|d| d.is_a? Float}
200
+ elsif v.to_a.find {|d| d.is_a? Float}
361
201
  "<realvariable name=\"#{name}\" #{nickname} />"
362
202
  else
363
203
  "<integervariable name=\"#{name}\" #{nickname} />"
364
204
  end
365
205
  end
366
-
367
206
  end
368
207
  end
369
208
  end