statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -7,8 +7,8 @@ module Statsample
7
7
  # variables.
8
8
  #
9
9
  # == Usage
10
- # a = [1,2,3,4,5,6].to_numeric
11
- # b = [2,3,4,5,6,7].to_numeric
10
+ # a = Daru::Vector.new([1,2,3,4,5,6])
11
+ # b = Daru::Vector.new([2,3,4,5,6,7])
12
12
  # pearson = Statsample::Bivariate::Pearson.new(a,b)
13
13
  # puts pearson.r
14
14
  # puts pearson.t
@@ -34,24 +34,33 @@ module Statsample
34
34
  # will be hashes, with keys = values, for recodification
35
35
  def create_hash(dataset, vectors, sep=Statsample::SPLIT_TOKEN)
36
36
  raise ArgumentError,"Array should't be empty" if vectors.size==0
37
- pro_hash=vectors.inject({}){|h,v_name|
38
- raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
39
- v=dataset[v_name]
40
- split_data=v.splitted(sep).flatten.collect {|c| c.to_s}.find_all {|c| !c.nil?}
37
+ pro_hash = vectors.inject({}) do |h,v_name|
38
+ v_name = v_name.is_a?(Numeric) ? v_name : v_name.to_sym
39
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if
40
+ !dataset.vectors.include?(v_name)
41
+ v = dataset[v_name]
42
+ split_data = v.splitted(sep)
43
+ .flatten
44
+ .collect { |c| c.to_s }
45
+ .find_all{ |c| !c.nil? }
41
46
 
42
- factors=split_data.uniq.compact.sort.inject({}) {|ac,val| ac[val]=val;ac }
43
- h[v_name]=factors
47
+ factors = split_data.uniq
48
+ .compact
49
+ .sort
50
+ .inject({}) { |ac,val| ac[val] = val; ac }
51
+ h[v_name] = factors
44
52
  h
45
- }
53
+ end
54
+
46
55
  pro_hash
47
56
  end
48
57
  # Create a yaml to create a dictionary, based on vectors
49
58
  # The keys will be vectors name on dataset and the values
50
59
  # will be hashes, with keys = values, for recodification
51
60
  #
52
- # v1=%w{a,b b,c d}.to_vector
53
- # ds={"v1"=>v1}.to_dataset
54
- # Statsample::Codification.create_yaml(ds,['v1'])
61
+ # v1 = Daru::Vector.new(%w{a,b b,c d})
62
+ # ds = Daru::DataFrame.new({:v1 => v1})
63
+ # Statsample::Codification.create_yaml(ds,[:v1])
55
64
  # => "--- \nv1: \n a: a\n b: b\n c: c\n d: d\n"
56
65
  def create_yaml(dataset, vectors, io=nil, sep=Statsample::SPLIT_TOKEN)
57
66
  pro_hash=create_hash(dataset, vectors, sep)
@@ -69,16 +78,17 @@ module Statsample
69
78
  if File.exist?(filename)
70
79
  raise "Exists a file named #{filename}. Delete ir before overwrite."
71
80
  end
72
- book = Spreadsheet::Workbook.new
81
+ book = Spreadsheet::Workbook.new
73
82
  sheet = book.create_worksheet
74
- sheet.row(0).concat(%w{field original recoded})
75
- i=1
83
+ sheet.row(0).concat(%w(field original recoded))
84
+ i = 1
76
85
  create_hash(dataset, vectors, sep).sort.each do |field, inner_hash|
77
86
  inner_hash.sort.each do |k,v|
78
- sheet.row(i).concat([field.dup,k.dup,v.dup])
79
- i+=1
87
+ sheet.row(i).concat([field.to_s,k.to_s,v.to_s])
88
+ i += 1
80
89
  end
81
90
  end
91
+
82
92
  book.write(filename)
83
93
  end
84
94
  # From a excel generates a dictionary hash
@@ -91,10 +101,11 @@ module Statsample
91
101
  sheet= book.worksheet 0
92
102
  row_i=0
93
103
  sheet.each do |row|
94
- row_i+=1
95
- next if row_i==1 or row[0].nil? or row[1].nil? or row[2].nil?
96
- h[row[0]]={} if h[row[0]].nil?
97
- h[row[0]][row[1]]=row[2]
104
+ row_i += 1
105
+ next if row_i == 1 or row[0].nil? or row[1].nil? or row[2].nil?
106
+ key = row[0].to_sym
107
+ h[key] ||= {}
108
+ h[key][row[1]] = row[2]
98
109
  end
99
110
  h
100
111
  end
@@ -110,12 +121,12 @@ module Statsample
110
121
  end
111
122
 
112
123
  def dictionary(h, sep=Statsample::SPLIT_TOKEN)
113
- h.inject({}) {|a,v| a[v[0]]=v[1].split(sep); a }
124
+ h.inject({}) { |a,v| a[v[0]]=v[1].split(sep); a }
114
125
  end
115
126
 
116
127
  def recode_vector(v,h,sep=Statsample::SPLIT_TOKEN)
117
- dict=dictionary(h,sep)
118
- new_data=v.splitted(sep)
128
+ dict = dictionary(h,sep)
129
+ new_data = v.splitted(sep)
119
130
  new_data.collect do |c|
120
131
  if c.nil?
121
132
  nil
@@ -134,20 +145,22 @@ module Statsample
134
145
  def _recode_dataset(dataset, h , sep=Statsample::SPLIT_TOKEN, split=false)
135
146
  v_names||=h.keys
136
147
  v_names.each do |v_name|
137
- raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.fields.include? v_name
138
- recoded=recode_vector(dataset[v_name], h[v_name],sep).collect { |c|
139
- if c.nil?
140
- nil
141
- else
142
- c.join(sep)
148
+ raise Exception, "Vector #{v_name} doesn't exists on Dataset" if !dataset.vectors.include? v_name
149
+ recoded = Daru::Vector.new(
150
+ recode_vector(dataset[v_name], h[v_name],sep).collect do |c|
151
+ if c.nil?
152
+ nil
153
+ else
154
+ c.join(sep)
155
+ end
143
156
  end
144
- }.to_vector
145
- if(split)
157
+ )
158
+ if split
146
159
  recoded.split_by_separator(sep).each {|k,v|
147
- dataset[v_name+"_"+k]=v
160
+ dataset[(v_name.to_s + "_" + k).to_sym] = v
148
161
  }
149
162
  else
150
- dataset[v_name+"_recoded"]=recoded
163
+ dataset[(v_name.to_s + "_recoded").to_sym] = recoded
151
164
  end
152
165
  end
153
166
  end
@@ -1,65 +1,27 @@
1
- require 'csv'
2
-
1
+ # This module will be removed in the next release.
2
+ # Please shift to using Daru::DataFrame.from_csv and #write_csv for CSV
3
+ # related operations.
3
4
  module Statsample
4
- class CSV < SpreadsheetBase
5
- # Default options for processing CSV files. Accept the same options as
6
- # Ruby's `CSV#new`.
7
- DEFAULT_OPTIONS = {
8
- converters: [:numeric]
9
- }
10
-
5
+ class CSV
11
6
  class << self
12
- # Return a Dataset created from a csv file.
7
+ # Return a DataFrom created from a csv file.
13
8
  #
14
- # USE:
15
- # ds = Statsample::CSV.read('test_csv.csv')
9
+ # == NOTE
10
+ #
11
+ # This method has been DEPRECATED in favour of Daru::DataFrame.from_csv.
12
+ # Please switch to using that.
16
13
  def read(filename, empty = [''], ignore_lines = 0, opts = {})
17
- first_row = true
18
- fields = []
19
- ds = nil
20
- line_number = 0
21
- options = DEFAULT_OPTIONS.merge(opts)
22
-
23
- csv = ::CSV.open(filename, 'rb', options)
24
-
25
- csv.each do |row|
26
- line_number += 1
27
-
28
- if (line_number <= ignore_lines)
29
- next
30
- end
31
-
32
- if first_row
33
- fields = extract_fields(row)
34
- ds = Statsample::Dataset.new(fields)
35
- first_row = false
36
- else
37
- rowa = process_row(row, empty)
38
- ds.add_case(rowa, false)
39
- end
40
- end
41
-
42
- convert_to_numeric_and_date(ds, fields)
43
- ds.update_valid_data
44
- ds
14
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_csv instead."
45
15
  end
46
16
 
47
17
  # Save a Dataset on a csv file.
48
18
  #
49
- # USE:
50
- # Statsample::CSV.write(ds, 'test_csv.csv')
19
+ # == NOTE
20
+ #
21
+ # This method has BEEN DEPRECATED in favor of Daru::DataFrame#write_csv.
22
+ # Please use that instead.
51
23
  def write(dataset, filename, convert_comma = false, opts = {})
52
- options = DEFAULT_OPTIONS.merge(opts)
53
-
54
- writer = ::CSV.open(filename, 'w', options)
55
- writer << dataset.fields
56
-
57
- dataset.each_array do |row|
58
- row.collect! { |v| v.to_s.gsub('.', ',') } if convert_comma
59
- writer << row
60
- end
61
-
62
- writer.close
24
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_csv instead."
63
25
  end
64
26
  end
65
27
  end
@@ -4,26 +4,27 @@ module Statsample
4
4
  # Export a SPSS Matrix with tetrachoric correlations .
5
5
  #
6
6
  # Use:
7
- # ds=Statsample::Excel.read("my_data.xls")
7
+ # ds=Daru::DataFrame.from_excel("my_data.xls")
8
8
  # puts Statsample::SPSS.tetrachoric_correlation_matrix(ds)
9
9
  def tetrachoric_correlation_matrix(ds)
10
10
  dsv=ds.dup_only_valid
11
11
  # Delete all vectors doesn't have variation
12
- dsv.fields.each{|f|
12
+ dsv.vectors.each { |f|
13
13
  if dsv[f].factors.size==1
14
14
  dsv.delete_vector(f)
15
15
  else
16
16
  dsv[f]=dsv[f].dichotomize
17
17
  end
18
18
  }
19
+
19
20
  tcm=Statsample::Bivariate.tetrachoric_correlation_matrix(dsv)
20
- n=dsv.fields.collect {|f|
21
+ n=dsv.vectors.to_a.collect {|f|
21
22
  sprintf("%d",dsv[f].size)
22
23
  }
23
- meanlist=dsv.fields.collect{|f|
24
+ meanlist=dsv.vectors.to_a.collect{|f|
24
25
  sprintf("%0.3f", dsv[f].mean)
25
26
  }
26
- stddevlist=dsv.fields.collect{|f|
27
+ stddevlist=dsv.vectors.to_a.collect{|f|
27
28
  sprintf("%0.3f", dsv[f].sd)
28
29
  }
29
30
  out=<<-HEREDOC
@@ -1,63 +1,36 @@
1
1
  require 'statsample/converter/spss'
2
2
  module Statsample
3
- # Create and dumps Datasets on a database
3
+ # Create and dumps Datasets on a database
4
+ #
5
+ # == NOTE
6
+ #
7
+ # Deprecated. Use Daru::DataFrame.from_sql and Daru::DataFrame#write_sql
4
8
  module Database
5
9
  class << self
6
10
  # Read a database query and returns a Dataset
7
11
  #
8
- # USE:
9
- #
10
- # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
11
- # Statsample.read(dbh, "SELECT * FROM test")
12
- #
12
+ # == NOTE
13
+ #
14
+ # Deprecated. Use Daru::DataFrame.from_sql instead.
13
15
  def read(dbh,query)
14
- require 'dbi'
15
- sth=dbh.execute(query)
16
- vectors={}
17
- fields=[]
18
- sth.column_info.each {|c|
19
- vectors[c['name']]=Statsample::Vector.new([])
20
- vectors[c['name']].name=c['name']
21
- vectors[c['name']].type= (c['type_name']=='INTEGER' or c['type_name']=='DOUBLE') ? :numeric : :object
22
- fields.push(c['name'])
23
- }
24
- ds=Statsample::Dataset.new(vectors,fields)
25
- sth.fetch do |row|
26
- ds.add_case(row.to_a, false )
27
- end
28
- ds.update_valid_data
29
- ds
16
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_sql instead."
30
17
  end
18
+
31
19
  # Insert each case of the Dataset on the selected table
32
20
  #
33
- # USE:
34
- #
35
- # ds={'id'=>[1,2,3].to_vector, 'name'=>["a","b","c"].to_vector}.to_dataset
36
- # dbh = DBI.connect("DBI:Mysql:database:localhost", "user", "password")
37
- # Statsample::Database.insert(ds,dbh,"test")
38
- #
21
+ # == NOTE
22
+ #
23
+ # Deprecated. Use Daru::DataFrame#write_sql instead
39
24
  def insert(ds, dbh, table)
40
- require 'dbi'
41
- query="INSERT INTO #{table} ("+ds.fields.join(",")+") VALUES ("+((["?"]*ds.fields.size).join(","))+")"
42
- sth=dbh.prepare(query)
43
- ds.each_array{|c| sth.execute(*c) }
44
- return true
25
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_sql instead."
45
26
  end
46
27
  # Create a sql, basen on a given Dataset
47
28
  #
48
- # USE:
49
- #
50
- # ds={'id'=>[1,2,3,4,5].to_vector,'name'=>%w{Alex Peter Susan Mary John}.to_vector}.to_dataset
51
- # Statsample::Database.create_sql(ds,'names')
52
- # ==>"CREATE TABLE names (id INTEGER,\n name VARCHAR (255)) CHARACTER SET=UTF8;"
53
- #
29
+ # == NOTE
30
+ #
31
+ # Deprecated. Use Daru::DataFrame#create_sql instead.
54
32
  def create_sql(ds,table,charset="UTF8")
55
- sql="CREATE TABLE #{table} ("
56
- fields=ds.fields.collect{|f|
57
- v=ds[f]
58
- f+" "+v.db_type
59
- }
60
- sql+fields.join(",\n ")+") CHARACTER SET=#{charset};"
33
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#create_sql instead."
61
34
  end
62
35
  end
63
36
  end
@@ -65,182 +38,49 @@ module Statsample
65
38
  class << self
66
39
  def write(dataset,filename)
67
40
  File.open(filename,"wb") do |fp|
68
- fp.puts dataset.fields.join("\t")
69
- dataset.each_array_with_nils do |row|
70
- row2=row.collect{|v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
41
+ fp.puts dataset.vectors.to_a.join("\t")
42
+ dataset.each_row do |row|
43
+ row2 = row.map { |v| v.nil? ? "NA" : v.to_s.gsub(/\s+/,"_") }
71
44
  fp.puts row2.join("\t")
72
45
  end
73
46
  end
74
47
  end
75
48
  end
76
49
  end
77
- class SpreadsheetBase
78
- class << self
79
- def extract_fields(row)
80
- i=0;
81
- fields=row.to_a.collect{|c|
82
- if c.nil?
83
- i+=1
84
- "var%05d" % i
85
- else
86
- c.to_s.downcase
87
- end
88
- }
89
- fields.recode_repeated
90
- end
91
50
 
92
- def process_row(row,empty)
93
- row.to_a.map do |c|
94
- if empty.include?(c)
95
- nil
96
- else
97
- if c.is_a? String and c.is_number?
98
- if c=~/^\d+$/
99
- c.to_i
100
- else
101
- c.gsub(",",".").to_f
102
- end
103
- else
104
- c
105
- end
106
- end
107
- end
108
- end
109
- def convert_to_numeric_and_date(ds,fields)
110
- fields.each do |f|
111
- if ds[f].can_be_numeric?
112
- ds[f].type=:numeric
113
- elsif ds[f].can_be_date?
114
- ds[f].type=:date
115
- end
116
- end
51
+ class PlainText
52
+ class << self
53
+ def read(filename, fields)
54
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_plaintext instead."
117
55
  end
118
-
119
56
  end
120
57
  end
121
- class PlainText < SpreadsheetBase
122
- class << self
123
- def read(filename, fields)
124
- ds=Statsample::Dataset.new(fields)
125
- fp=File.open(filename,"r")
126
- fp.each_line do |line|
127
- row=process_row(line.strip.split(/\s+/),[""])
128
- next if row==["\x1A"]
129
- ds.add_case_array(row)
130
- end
131
- convert_to_numeric_and_date(ds,fields)
132
- ds.update_valid_data
133
- fields.each {|f|
134
- ds[f].name=f
135
- }
136
- ds
137
- end
138
- end
139
- end
140
- class Excel < SpreadsheetBase
58
+
59
+ # This class has been DEPRECATED. Use Daru::DataFrame::from_excel
60
+ # Daru::DataFrame#write_excel for XLS file operations.
61
+ class Excel
141
62
  class << self
142
63
  # Write a Excel spreadsheet based on a dataset
143
64
  # * TODO: Format nicely date values
65
+ #
66
+ # == NOTE
67
+ #
68
+ # Deprecated. Use Daru::DataFrame#write_csv.
144
69
  def write(dataset,filename)
145
- require 'spreadsheet'
146
- book = Spreadsheet::Workbook.new
147
- sheet = book.create_worksheet
148
- format = Spreadsheet::Format.new :color => :blue,
149
- :weight => :bold
150
- sheet.row(0).concat(dataset.fields.map {|i| i.dup}) # Unfreeze strings
151
- sheet.row(0).default_format = format
152
- i=1
153
- dataset.each_array{|row|
154
- sheet.row(i).concat(row)
155
- i+=1
156
- }
157
- book.write(filename)
158
- end
159
- # This should be fixed.
160
- # If we have a Formula, should be resolver first
161
-
162
- def preprocess_row(row, dates)
163
- i=-1
164
- row.collect!{|c|
165
- i+=1
166
- if c.is_a? Spreadsheet::Formula
167
- if(c.value.is_a? Spreadsheet::Excel::Error)
168
- nil
169
- else
170
- c.value
171
- end
172
- elsif dates.include? i and !c.nil? and c.is_a? Numeric
173
- row.date(i)
174
- else
175
- c
176
- end
177
- }
70
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame#write_excel instead."
178
71
  end
179
- private :process_row, :preprocess_row
180
72
 
181
73
  # Returns a dataset based on a xls file
182
- # USE:
183
- # ds = Statsample::Excel.read("test.xls")
184
- #
74
+ #
75
+ # == NOTE
76
+ #
77
+ # Deprecated. Use Daru::DataFrame.from_excel instead.
185
78
  def read(filename, opts=Hash.new)
186
- require 'spreadsheet'
187
- raise "options should be Hash" unless opts.is_a? Hash
188
- opts_default={
189
- :worksheet_id=>0,
190
- :ignore_lines=>0,
191
- :empty=>['']
192
- }
193
-
194
- opts=opts_default.merge opts
195
-
196
- worksheet_id=opts[:worksheet_id]
197
- ignore_lines=opts[:ignore_lines]
198
- empty=opts[:empty]
199
-
200
- first_row=true
201
- fields=[]
202
- ds=nil
203
- line_number=0
204
- book = Spreadsheet.open filename
205
- sheet= book.worksheet worksheet_id
206
- sheet.each do |row|
207
- begin
208
- dates=[]
209
- row.formats.each_index{|i|
210
- if !row.formats[i].nil? and row.formats[i].number_format=="DD/MM/YYYY"
211
- dates.push(i)
212
- end
213
- }
214
- line_number+=1
215
- next if(line_number<=ignore_lines)
216
-
217
- preprocess_row(row,dates)
218
- if first_row
219
- fields=extract_fields(row)
220
- ds=Statsample::Dataset.new(fields)
221
- first_row=false
222
- else
223
- rowa=process_row(row,empty)
224
- (fields.size - rowa.size).times {
225
- rowa << nil
226
- }
227
- ds.add_case(rowa,false)
228
- end
229
- rescue => e
230
- error="#{e.to_s}\nError on Line # #{line_number}:#{row.join(",")}"
231
- raise
232
- end
233
- end
234
- convert_to_numeric_and_date(ds, fields)
235
- ds.update_valid_data
236
- fields.each {|f|
237
- ds[f].name=f
238
- }
239
- ds.name=filename
240
- ds
79
+ raise NoMethodError, "Deprecated. Use Daru::DataFrame.from_excel instead."
241
80
  end
242
81
  end
243
82
  end
83
+
244
84
  module Mx
245
85
  class << self
246
86
  def write(dataset,filename,type=:covariance)
@@ -249,12 +89,12 @@ module Statsample
249
89
  fp.puts "! #{filename}"
250
90
  fp.puts "! Output generated by Statsample"
251
91
  fp.puts "Data Ninput=#{dataset.fields.size} Nobservations=#{dataset.cases}"
252
- fp.puts "Labels "+dataset.fields.join(" ")
92
+ fp.puts "Labels " + dataset.vectors.to_a.join(" ")
253
93
  case type
254
94
  when :raw
255
95
  fp.puts "Rectangular"
256
96
  dataset.each do |row|
257
- out=dataset.fields.collect do |f|
97
+ out=dataset.vectors.to_a.collect do |f|
258
98
  if dataset[f].is_valid? row[f]
259
99
  row[f]
260
100
  else
@@ -292,18 +132,18 @@ module Statsample
292
132
  carrier=OpenStruct.new
293
133
  carrier.categorials=[]
294
134
  carrier.conversions={}
295
- variables_def=dataset.fields.collect{|k|
135
+ variables_def=dataset.vectors.to_a.collect{|k|
296
136
  variable_definition(carrier,dataset[k],k)
297
137
  }.join("\n")
298
138
 
299
139
  indexes=carrier.categorials.inject({}) {|s,c|
300
- s[dataset.fields.index(c)]=c
140
+ s[dataset.vectors.to_a.index(c)]=c
301
141
  s
302
142
  }
303
143
  records=""
304
- dataset.each_array {|c|
305
- indexes.each{|ik,iv|
306
- c[ik]=carrier.conversions[iv][c[ik]]
144
+ dataset.each_row {|c|
145
+ indexes.each { |ik,iv|
146
+ c[ik] = carrier.conversions[iv][c[ik]]
307
147
  }
308
148
  records << "<record>#{values_definition(c, default_opt[:missing])}</record>\n"
309
149
  }
@@ -345,7 +185,7 @@ out
345
185
  # nickname = nickname
346
186
  def variable_definition(carrier,v,name,nickname=nil)
347
187
  nickname = (nickname.nil? ? "" : "nickname=\"#{nickname}\"" )
348
- if v.type==:object or v.data.find {|d| d.is_a? String }
188
+ if v.type==:object or v.to_a.find {|d| d.is_a? String }
349
189
  carrier.categorials.push(name)
350
190
  carrier.conversions[name]={}
351
191
  factors=v.factors
@@ -353,17 +193,16 @@ out
353
193
  out << "<levels count=\"#{factors.size}\">\n"
354
194
  out << (1..factors.size).to_a.collect{|i|
355
195
  carrier.conversions[name][factors[i-1]]=i
356
- "<level value=\"#{i}\">#{v.labeling(factors[i-1])}</level>"
196
+ "<level value=\"#{i}\">#{(v.labels[factors[i-1]] || factors[i-1])}</level>"
357
197
  }.join("\n")
358
198
  out << "</levels>\n</categoricalvariable>\n"
359
199
  out
360
- elsif v.data.find {|d| d.is_a? Float}
200
+ elsif v.to_a.find {|d| d.is_a? Float}
361
201
  "<realvariable name=\"#{name}\" #{nickname} />"
362
202
  else
363
203
  "<integervariable name=\"#{name}\" #{nickname} />"
364
204
  end
365
205
  end
366
-
367
206
  end
368
207
  end
369
208
  end