statsample 1.5.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -59,8 +59,6 @@ class MatrixEngine < BaseEngine
59
59
  @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
60
60
  @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
61
61
 
62
-
63
-
64
62
  @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
65
63
 
66
64
  @x_sd=@n_predictors.times.inject({}) {|ac,i|
@@ -77,14 +75,14 @@ class MatrixEngine < BaseEngine
77
75
  @y_mean=0.0
78
76
  @name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
79
77
 
80
- opts_default={:digits=>3}
81
- opts=opts_default.merge opts
78
+ opts_default = {:digits=>3}
79
+ opts = opts_default.merge opts
82
80
  opts.each{|k,v|
83
81
  self.send("#{k}=",v) if self.respond_to? k
84
82
  }
85
83
  result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
86
84
 
87
- if matrix._type==:covariance
85
+ if matrix._type == :covariance
88
86
  @coeffs=result_matrix.column(0).to_a
89
87
  @coeffs_stan=coeffs.collect {|k,v|
90
88
  coeffs[k]*@x_sd[k].quo(@y_sd)
@@ -116,12 +114,12 @@ class MatrixEngine < BaseEngine
116
114
  end
117
115
  # Value of constant
118
116
  def constant
119
- c=coeffs
120
- @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
117
+ c = coeffs
118
+ @y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
121
119
  end
122
120
  # Hash of b or raw coefficients
123
121
  def coeffs
124
- assign_names(@coeffs)
122
+ assign_names(@coeffs)
125
123
  end
126
124
  # Hash of beta or standarized coefficients
127
125
 
@@ -185,7 +183,7 @@ class MatrixEngine < BaseEngine
185
183
  sd[:constant]=0
186
184
  fields=[:constant]+@matrix_cov.fields-[@y_var]
187
185
  # Recreate X'X using the variance-covariance matrix
188
- xt_x=Matrix.rows(fields.collect {|i|
186
+ xt_x=::Matrix.rows(fields.collect {|i|
189
187
  fields.collect {|j|
190
188
  if i==:constant or j==:constant
191
189
  cov=0
@@ -8,76 +8,74 @@ module Multiple
8
8
  #
9
9
  # Example:
10
10
  #
11
- # @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:numeric)
12
- # @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:numeric)
13
- # @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:numeric)
14
- # @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:numeric)
15
- # ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
16
- # lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
11
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
12
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
13
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
14
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
15
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
16
+ # lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)
17
17
 
18
18
  class RubyEngine < MatrixEngine
19
19
  def initialize(ds,y_var, opts=Hash.new)
20
- matrix=ds.correlation_matrix
21
- fields_indep=ds.fields-[y_var]
22
- default={
23
- :y_mean=>ds[y_var].mean,
24
- :x_mean=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
25
- :y_sd=>ds[y_var].sd,
26
- :x_sd=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
27
- :cases=>Statsample::Bivariate.min_n_valid(ds)
20
+ matrix = Statsample::Bivariate.correlation_matrix ds
21
+ fields_indep=ds.vectors.to_a - [y_var]
22
+ default= {
23
+ :y_mean => ds[y_var].mean,
24
+ :x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
25
+ :y_sd => ds[y_var].sd,
26
+ :x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
27
+ :cases => Statsample::Bivariate.min_n_valid(ds)
28
28
  }
29
- opts=opts.merge(default)
29
+ opts = opts.merge(default)
30
30
  super(matrix, y_var, opts)
31
- @ds=ds
32
- @dy=ds[@y_var]
33
- @ds_valid=ds.dup_only_valid
34
- @total_cases=@ds.cases
35
- @valid_cases=@ds_valid.cases
36
- @ds_indep = ds.dup(ds.fields-[y_var])
31
+ @ds = ds
32
+ @dy = ds[@y_var]
33
+ @ds_valid = ds.dup_only_valid
34
+ @total_cases = @ds.nrows
35
+ @valid_cases = @ds_valid.nrows
36
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
37
37
  set_dep_columns
38
38
  end
39
39
 
40
40
  def set_dep_columns
41
- @dep_columns=[]
42
- @ds_indep.each_vector{|k,v|
43
- @dep_columns.push(v.data_with_nils)
44
- }
41
+ @dep_columns = []
42
+ @ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }
45
43
  end
46
44
 
47
45
  def fix_with_mean
48
46
  i=0
49
- @ds_indep.each do |row|
47
+ @ds_indep.each(:row) do |row|
50
48
  empty=[]
51
49
  row.each do |k,v|
52
50
  empty.push(k) if v.nil?
53
51
  end
52
+
54
53
  if empty.size==1
55
54
  @ds_indep[empty[0]][i]=@ds[empty[0]].mean
56
55
  end
57
- i+=1
56
+ i += 1
58
57
  end
59
- @ds_indep.update_valid_data
58
+ @ds_indep.update
60
59
  set_dep_columns
61
60
  end
62
61
  def fix_with_regression
63
- i=0
64
- @ds_indep.each{|row|
65
- empty=[]
66
- row.each{|k,v|
67
- empty.push(k) if v.nil?
68
- }
62
+ i = 0
63
+ @ds_indep.each(:row) do |row|
64
+ empty = []
65
+ row.each { |k,v| empty.push(k) if v.nil? }
69
66
  if empty.size==1
70
- field=empty[0]
71
- lr=MultipleRegression.new(@ds_indep,field)
72
- fields=[]
73
- @ds_indep.fields.each{|f|
74
- fields.push(row[f]) unless f==field
67
+ field = empty[0]
68
+ lr = MultipleRegression.new(@ds_indep,field)
69
+ fields = []
70
+ @ds_indep.vectors.each { |f|
71
+ fields.push(row[f]) unless f == field
75
72
  }
73
+
76
74
  @ds_indep[field][i]=lr.process(fields)
77
75
  end
78
76
  i+=1
79
- }
80
- @ds_indep.update_valid_data
77
+ end
78
+ @ds_indep.update
81
79
  set_dep_columns
82
80
  end
83
81
  # Standard error for constant
@@ -4,30 +4,30 @@ module Statsample
4
4
  # Calculate Chonbach's alpha for a given dataset.
5
5
  # only uses tuples without missing data
6
6
  def cronbach_alpha(ods)
7
- ds=ods.dup_only_valid
8
- n_items=ds.fields.size
9
- return nil if n_items<=1
10
- s2_items=ds.vectors.inject(0) {|ac,v|
11
- ac+v[1].variance }
12
- total=ds.vector_sum
7
+ ds = ods.dup_only_valid
8
+ n_items = ds.ncols
9
+ return nil if n_items <= 1
10
+ s2_items = ds.to_hash.values.inject(0) { |ac,v|
11
+ ac + v.variance }
12
+ total = ds.vector_sum
13
13
 
14
- (n_items.quo(n_items-1)) * (1-(s2_items.quo(total.variance)))
14
+ (n_items.quo(n_items - 1)) * (1 - (s2_items.quo(total.variance)))
15
15
  end
16
16
  # Calculate Chonbach's alpha for a given dataset
17
17
  # using standarized values for every vector.
18
18
  # Only uses tuples without missing data
19
19
  # Return nil if one or more vectors has 0 variance
20
20
  def cronbach_alpha_standarized(ods)
21
+ ds = ods.dup_only_valid
22
+ return nil if ds.any? { |v| v.variance==0}
21
23
 
22
- ds=ods.dup_only_valid
23
-
24
- return nil if ds.vectors.any? {|k,v| v.variance==0}
25
-
26
- ds=ds.fields.inject({}){|a,f|
27
- a[f]=ods[f].standarized;
28
- a
29
- }.to_dataset
30
-
24
+ ds = Daru::DataFrame.new(
25
+ ds.vectors.to_a.inject({}) { |a,i|
26
+ a[i] = ods[i].standardize
27
+ a
28
+ }
29
+ )
30
+
31
31
  cronbach_alpha(ds)
32
32
  end
33
33
  # Predicted reliability of a test by replicating
@@ -54,10 +54,10 @@ module Statsample
54
54
  end
55
55
  # Get Cronbach's alpha from a covariance matrix
56
56
  def cronbach_alpha_from_covariance_matrix(cov)
57
- n=cov.row_size
57
+ n = cov.row_size
58
58
  raise "covariance matrix should have at least 2 variables" if n < 2
59
- s2=n.times.inject(0) {|ac,i| ac+cov[i,i]}
60
- (n.quo(n-1))*(1-(s2.quo(cov.total_sum)))
59
+ s2 = n.times.inject(0) { |ac,i| ac + cov[i,i] }
60
+ (n.quo(n - 1)) * (1 - (s2.quo(cov.total_sum)))
61
61
  end
62
62
  # Returns n necessary to obtain specific alpha
63
63
  # given variance and covariance mean of items
@@ -82,8 +82,6 @@ module Statsample
82
82
  end
83
83
  c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
84
84
  dif=c_a - alpha
85
- #puts "#{n} , #{c_a}"
86
-
87
85
  end
88
86
  n
89
87
  end
@@ -110,20 +108,20 @@ module Statsample
110
108
  attr_reader :totals, :counts, :vector_total
111
109
  def initialize (ds, vector_total=nil)
112
110
  vector_total||=ds.vector_sum
113
- raise ArgumentError, "Total size != Dataset size" if vector_total.size!=ds.cases
111
+ raise ArgumentError, "Total size != Dataset size" if vector_total.size != ds.nrows
114
112
  @vector_total=vector_total
115
113
  @ds=ds
116
114
  @totals={}
117
- @counts=@ds.fields.inject({}) {|a,v| a[v]={};a}
115
+ @counts=@ds.vectors.to_a.inject({}) {|a,v| a[v]={};a}
118
116
  process
119
117
  end
120
118
  def process
121
119
  i=0
122
- @ds.each do |row|
120
+ @ds.each_row do |row|
123
121
  tot=@vector_total[i]
124
122
  @totals[tot]||=0
125
123
  @totals[tot]+=1
126
- @ds.fields.each do |f|
124
+ @ds.vectors.each do |f|
127
125
  item=row[f].to_s
128
126
  @counts[f][tot]||={}
129
127
  @counts[f][tot][item]||=0
@@ -6,12 +6,12 @@ module Statsample
6
6
  # several ratings) on a target and another measurement obtained on that target"
7
7
  # == Usage
8
8
  # require 'statsample'
9
- # size=1000
10
- # a = size.times.map {rand(10)}.to_numeric
9
+ # size = 1000
10
+ # a = Daru::Vector.new(size.times.map {rand(10)})
11
11
  # b = a.recode{|i|i+rand(4)-2}
12
- # c =a.recode{|i|i+rand(4)-2}
12
+ # c = a.recode{|i|i+rand(4)-2}
13
13
  # d = a.recode{|i|i+rand(4)-2}
14
- # ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
14
+ # ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
15
15
  # # Use :type attribute to set type to summarize
16
16
  # icc=Statsample::Reliability::ICC.new(ds, :type=>:icc_1_k)
17
17
  # puts icc.summary
@@ -96,10 +96,11 @@ module Statsample
96
96
  attr_accessor :alpha
97
97
  attr_accessor :name
98
98
  def initialize(ds, opts=Hash.new)
99
+ ds.update
99
100
  @ds=ds.dup_only_valid
100
- @vectors=@ds.vectors.values
101
- @n=@ds.cases
102
- @k=@ds.fields.size
101
+ @vectors=@ds.map { |e| e }
102
+ @n=@ds.nrows
103
+ @k=@ds.ncols
103
104
  compute
104
105
  @g_rho=0
105
106
  @alpha=0.05
@@ -6,17 +6,17 @@ module Statsample
6
6
  # PCA and Factor Analysis.
7
7
  #
8
8
  # == Usage
9
- # @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:numeric)
10
- # @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:numeric)
11
- # @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:numeric)
12
- # @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:numeric)
13
- # ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
9
+ # @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
10
+ # @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
11
+ # @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
12
+ # @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
13
+ # ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
14
14
  # opts={:name=>"Scales", # Name of analysis
15
15
  # :summary_correlation_matrix=>true, # Add correlation matrix
16
16
  # :summary_pca } # Add PCA between scales
17
17
  # msa=Statsample::Reliability::MultiScaleAnalysis.new(opts) do |m|
18
- # m.scale :s1, ds.clone(%w{x1 x2})
19
- # m.scale :s2, ds.clone(%w{x3 x4}), {:name=>"Scale 2"}
18
+ # m.scale :s1, ds.clone([:x1, :x2])
19
+ # m.scale :s2, ds.clone([:x3, :x4]), {:name=>"Scale 2"}
20
20
  # end
21
21
  # # Retrieve summary
22
22
  # puts msa.summary
@@ -107,7 +107,7 @@ module Statsample
107
107
  # Retrieves a Principal Component Analysis (Factor::PCA)
108
108
  # using all scales, using <tt>opts</tt> a options.
109
109
  def pca(opts=nil)
110
- opts||=pca_options
110
+ opts ||= pca_options
111
111
  Statsample::Factor::PCA.new(correlation_matrix, opts)
112
112
  end
113
113
  # Retrieve Velicer's MAP
@@ -123,14 +123,15 @@ module Statsample
123
123
  Statsample::Factor::PrincipalAxis.new(correlation_matrix, opts)
124
124
  end
125
125
  def dataset_from_scales
126
- ds=Dataset.new(@scales_keys)
126
+ ds = Daru::DataFrame.new({}, order: @scales_keys.map(&:to_sym))
127
127
  @scales.each_pair do |code,scale|
128
- ds[code.to_s]=scale.ds.vector_sum
129
- ds[code.to_s].name=scale.name
128
+ ds[code.to_sym] = scale.ds.vector_sum
130
129
  end
131
- ds.update_valid_data
130
+
131
+ ds.update
132
132
  ds
133
133
  end
134
+
134
135
  def parallel_analysis(opts=nil)
135
136
  opts||=parallel_analysis_options
136
137
  Statsample::Factor::ParallelAnalysis.new(dataset_from_scales, opts)
@@ -140,6 +141,7 @@ module Statsample
140
141
  def correlation_matrix
141
142
  Statsample::Bivariate.correlation_matrix(dataset_from_scales)
142
143
  end
144
+
143
145
  def report_building(b) # :nodoc:
144
146
  b.section(:name=>name) do |s|
145
147
  s.section(:name=>_("Reliability analysis of scales")) do |s2|
@@ -3,12 +3,12 @@ module Statsample
3
3
  # Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
4
4
  # Returns several statistics for complete scale and each item
5
5
  # == Usage
6
- # @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:numeric)
7
- # @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:numeric)
8
- # @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:numeric)
9
- # @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:numeric)
10
- # ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
11
- # ia=Statsample::Reliability::ScaleAnalysis.new(ds)
6
+ # @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
7
+ # @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
8
+ # @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
9
+ # @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
10
+ # ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
11
+ # ia = Statsample::Reliability::ScaleAnalysis.new(ds)
12
12
  # puts ia.summary
13
13
  class ScaleAnalysis
14
14
  include Summarizable
@@ -16,40 +16,40 @@ module Statsample
16
16
  attr_accessor :name
17
17
  attr_accessor :summary_histogram
18
18
  def initialize(ds, opts=Hash.new)
19
- @dumped=ds.fields.find_all {|f|
20
- ds[f].variance==0
19
+ @dumped=ds.vectors.to_a.find_all {|f|
20
+ ds[f].variance == 0
21
21
  }
22
22
 
23
- @ods=ds
24
- @ds=ds.dup_only_valid(ds.fields - @dumped)
25
- @ds.name=ds.name
23
+ @ods = ds
24
+ @ds = ds.dup_only_valid(ds.vectors.to_a - @dumped)
25
+ @ds.rename ds.name
26
26
 
27
- @k=@ds.fields.size
28
- @total=@ds.vector_sum
27
+ @k = @ds.ncols
28
+ @total = @ds.vector_sum
29
29
  @o_total=@dumped.size > 0 ? @ods.vector_sum : nil
30
30
 
31
- @vector_mean=@ds.vector_mean
32
- @item_mean=@vector_mean.mean
33
- @item_sd=@vector_mean.sd
31
+ @vector_mean = @ds.vector_mean
32
+ @item_mean = @vector_mean.mean
33
+ @item_sd = @vector_mean.sd
34
34
 
35
- @mean=@total.mean
36
- @median=@total.median
37
-
38
- @skew=@total.skew
39
- @kurtosis=@total.kurtosis
40
- @sd = @total.sd
41
- @variance=@total.variance
42
- @valid_n = @total.size
43
- opts_default={
44
- :name=>_("Reliability Analysis"),
45
- :summary_histogram=>true
35
+ @mean = @total.mean
36
+ @median = @total.median
37
+ @skew = @total.skew
38
+ @kurtosis = @total.kurtosis
39
+ @sd = @total.sd
40
+ @variance = @total.variance
41
+ @valid_n = @total.size
42
+
43
+ opts_default = {
44
+ :name => _("Reliability Analysis"),
45
+ :summary_histogram => true
46
46
  }
47
- @opts=opts_default.merge(opts)
48
- @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
47
+ @opts = opts_default.merge(opts)
48
+ @opts.each{ |k,v| self.send("#{k}=",v) if self.respond_to? k }
49
49
 
50
50
  @cov_m=Statsample::Bivariate.covariance_matrix(@ds)
51
51
  # Mean for covariances and variances
52
- @variances=@k.times.map {|i| @cov_m[i,i]}.to_numeric
52
+ @variances = Daru::Vector.new(@k.times.map { |i| @cov_m[i,i] })
53
53
  @variances_mean=@variances.mean
54
54
  @covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
55
55
  #begin
@@ -66,7 +66,7 @@ module Statsample
66
66
  total={}
67
67
  @ds.each do |row|
68
68
  tot=@total[i]
69
- @ds.fields.each do |f|
69
+ @ds.vectors.each do |f|
70
70
  out[f]||= {}
71
71
  total[f]||={}
72
72
  out[f][tot]||= 0
@@ -87,43 +87,41 @@ module Statsample
87
87
  # Adjusted RPB(Point biserial-correlation) for each item
88
88
  #
89
89
  def item_total_correlation
90
- @itc||=@ds.fields.inject({}) do |a,v|
91
- vector=@ds[v].clone
92
- ds2=@ds.clone
93
- ds2.delete_vector(v)
94
- total=ds2.vector_sum
95
- a[v]=Statsample::Bivariate.pearson(vector,total)
90
+ vecs = @ds.vectors.to_a
91
+ @itc ||= vecs.inject({}) do |a,v|
92
+ total=@ds.vector_sum(vecs - [v])
93
+ a[v]=Statsample::Bivariate.pearson(@ds[v],total)
96
94
  a
97
95
  end
98
96
  end
99
97
  def mean_rpb
100
- item_total_correlation.values.to_numeric.mean
98
+ Daru::Vector.new(item_total_correlation.values).mean
101
99
  end
102
100
  def item_statistics
103
- @is||=@ds.fields.inject({}) do |a,v|
104
- a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
105
- a
106
- end
101
+ @is||=@ds.vectors.to_a.inject({}) do |a,v|
102
+ a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
103
+ a
104
+ end
107
105
  end
108
106
  # Returns a dataset with cases ordered by score
109
107
  # and variables ordered by difficulty
110
108
 
111
109
  def item_difficulty_analysis
112
110
  dif={}
113
- @ds.fields.each{|f| dif[f]=@ds[f].mean }
114
- dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
111
+ @ds.vectors.each{|f| dif[f]=@ds[f].mean }
112
+ dif_sort = dif.sort { |a,b| -(a[1]<=>b[1]) }
115
113
  scores_sort={}
116
114
  scores=@ds.vector_mean
117
- scores.each_index{|i| scores_sort[i]=scores[i] }
115
+ scores.each_index{ |i| scores_sort[i]=scores[i] }
118
116
  scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
119
- ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
117
+ ds_new = Daru::DataFrame.new({}, order: ([:case,:score] + dif_sort.collect{|a,b| a.to_sym}))
120
118
  scores_sort.each do |i,score|
121
- row=[i, score]
122
- case_row=@ds.case_as_hash(i)
123
- dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
124
- ds_new.add_case_array(row)
119
+ row = [i, score]
120
+ case_row = @ds.row[i].to_hash
121
+ dif_sort.each{ |variable,dif_value| row.push(case_row[variable]) }
122
+ ds_new.add_row(row)
125
123
  end
126
- ds_new.update_valid_data
124
+ ds_new.update
127
125
  ds_new
128
126
  end
129
127
 
@@ -132,9 +130,10 @@ module Statsample
132
130
  end
133
131
 
134
132
  def stats_if_deleted_intern # :nodoc:
135
- return Hash.new if @ds.fields.size==1
136
- @ds.fields.inject({}) do |a,v|
137
- cov_2=@cov_m.submatrix(@ds.fields-[v])
133
+ return Hash.new if @ds.ncols == 1
134
+ vecs = @ds.vectors.to_a
135
+ vecs.inject({}) do |a,v|
136
+ cov_2=@cov_m.submatrix(vecs - [v])
138
137
  #ds2=@ds.clone
139
138
  #ds2.delete_vector(v)
140
139
  #total=ds2.vector_sum
@@ -151,11 +150,10 @@ module Statsample
151
150
  def report_building(builder) #:nodoc:
152
151
  builder.section(:name=>@name) do |s|
153
152
 
154
-
155
153
  if @dumped.size>0
156
154
  s.section(:name=>"Items with variance=0") do |s1|
157
155
  s.table(:name=>_("Summary for %s with all items") % @name) do |t|
158
- t.row [_("Items"), @ods.fields.size]
156
+ t.row [_("Items"), @ods.ncols]
159
157
  t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
160
158
  t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
161
159
  end
@@ -170,7 +168,7 @@ module Statsample
170
168
 
171
169
 
172
170
  s.table(:name=>_("Summary for %s") % @name) do |t|
173
- t.row [_("Valid Items"), @ds.fields.size]
171
+ t.row [_("Valid Items"), @ds.ncols]
174
172
 
175
173
  t.row [_("Valid cases"), @valid_n]
176
174
  t.row [_("Sum mean"), "%0.4f" % @mean]
@@ -193,8 +191,8 @@ module Statsample
193
191
  end
194
192
 
195
193
  if (@alpha)
196
- s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.fields.size))
197
- s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.fields.size))
194
+ s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.ncols))
195
+ s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.ncols))
198
196
  end
199
197
 
200
198
 
@@ -203,7 +201,7 @@ module Statsample
203
201
  itc=item_total_correlation
204
202
 
205
203
  s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
206
- @ds.fields.each do |f|
204
+ @ds.vectors.each do |f|
207
205
  row=["#{@ds[f].name}(#{f})"]
208
206
  if is[f]
209
207
  row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]