statsample 1.5.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/.build.sh +15 -0
  3. data/.gitignore +1 -0
  4. data/.travis.yml +19 -7
  5. data/CONTRIBUTING.md +33 -0
  6. data/History.txt +5 -0
  7. data/README.md +41 -53
  8. data/benchmarks/correlation_matrix_15_variables.rb +6 -5
  9. data/benchmarks/correlation_matrix_5_variables.rb +6 -5
  10. data/benchmarks/correlation_matrix_methods/correlation_matrix.rb +23 -26
  11. data/examples/boxplot.rb +17 -5
  12. data/examples/correlation_matrix.rb +36 -7
  13. data/examples/dataset.rb +25 -5
  14. data/examples/dominance_analysis.rb +8 -7
  15. data/examples/dominance_analysis_bootstrap.rb +16 -11
  16. data/examples/histogram.rb +16 -2
  17. data/examples/icc.rb +5 -6
  18. data/examples/levene.rb +17 -3
  19. data/examples/multiple_regression.rb +6 -3
  20. data/examples/parallel_analysis.rb +11 -6
  21. data/examples/polychoric.rb +26 -13
  22. data/examples/principal_axis.rb +8 -4
  23. data/examples/reliability.rb +10 -10
  24. data/examples/scatterplot.rb +8 -0
  25. data/examples/t_test.rb +7 -0
  26. data/examples/u_test.rb +10 -2
  27. data/examples/vector.rb +9 -6
  28. data/examples/velicer_map_test.rb +12 -8
  29. data/lib/statsample.rb +13 -47
  30. data/lib/statsample/analysis/suite.rb +1 -1
  31. data/lib/statsample/anova/oneway.rb +6 -6
  32. data/lib/statsample/anova/twoway.rb +26 -24
  33. data/lib/statsample/bivariate.rb +78 -61
  34. data/lib/statsample/bivariate/pearson.rb +2 -2
  35. data/lib/statsample/codification.rb +45 -32
  36. data/lib/statsample/converter/csv.rb +15 -53
  37. data/lib/statsample/converter/spss.rb +6 -5
  38. data/lib/statsample/converters.rb +50 -211
  39. data/lib/statsample/crosstab.rb +26 -25
  40. data/lib/statsample/daru.rb +117 -0
  41. data/lib/statsample/dataset.rb +70 -942
  42. data/lib/statsample/dominanceanalysis.rb +16 -17
  43. data/lib/statsample/dominanceanalysis/bootstrap.rb +26 -28
  44. data/lib/statsample/factor/parallelanalysis.rb +17 -19
  45. data/lib/statsample/factor/pca.rb +21 -20
  46. data/lib/statsample/factor/principalaxis.rb +3 -3
  47. data/lib/statsample/graph/boxplot.rb +8 -16
  48. data/lib/statsample/graph/histogram.rb +4 -4
  49. data/lib/statsample/graph/scatterplot.rb +8 -7
  50. data/lib/statsample/histogram.rb +128 -119
  51. data/lib/statsample/matrix.rb +20 -16
  52. data/lib/statsample/multiset.rb +39 -38
  53. data/lib/statsample/regression.rb +3 -3
  54. data/lib/statsample/regression/multiple.rb +8 -10
  55. data/lib/statsample/regression/multiple/alglibengine.rb +96 -89
  56. data/lib/statsample/regression/multiple/baseengine.rb +32 -32
  57. data/lib/statsample/regression/multiple/gslengine.rb +33 -36
  58. data/lib/statsample/regression/multiple/matrixengine.rb +7 -9
  59. data/lib/statsample/regression/multiple/rubyengine.rb +39 -41
  60. data/lib/statsample/reliability.rb +23 -25
  61. data/lib/statsample/reliability/icc.rb +8 -7
  62. data/lib/statsample/reliability/multiscaleanalysis.rb +14 -12
  63. data/lib/statsample/reliability/scaleanalysis.rb +58 -60
  64. data/lib/statsample/reliability/skillscaleanalysis.rb +34 -29
  65. data/lib/statsample/resample.rb +1 -1
  66. data/lib/statsample/shorthand.rb +29 -25
  67. data/lib/statsample/test/kolmogorovsmirnov.rb +5 -3
  68. data/lib/statsample/test/levene.rb +28 -27
  69. data/lib/statsample/test/t.rb +7 -9
  70. data/lib/statsample/test/umannwhitney.rb +28 -28
  71. data/lib/statsample/test/wilcoxonsignedrank.rb +45 -43
  72. data/lib/statsample/vector.rb +70 -1013
  73. data/lib/statsample/version.rb +1 -1
  74. data/statsample.gemspec +12 -16
  75. data/test/helpers_tests.rb +1 -1
  76. data/test/test_analysis.rb +17 -17
  77. data/test/test_anova_contrast.rb +6 -6
  78. data/test/test_anovatwowaywithdataset.rb +8 -8
  79. data/test/test_anovawithvectors.rb +8 -8
  80. data/test/test_awesome_print_bug.rb +1 -1
  81. data/test/test_bartlettsphericity.rb +4 -4
  82. data/test/test_bivariate.rb +48 -43
  83. data/test/test_codification.rb +33 -33
  84. data/test/test_crosstab.rb +9 -9
  85. data/test/test_dataset.rb +28 -458
  86. data/test/test_factor.rb +46 -38
  87. data/test/test_factor_pa.rb +22 -13
  88. data/test/test_ggobi.rb +4 -4
  89. data/test/test_gsl.rb +4 -4
  90. data/test/test_histogram.rb +3 -3
  91. data/test/test_matrix.rb +13 -13
  92. data/test/test_multiset.rb +103 -91
  93. data/test/test_regression.rb +57 -52
  94. data/test/test_reliability.rb +55 -45
  95. data/test/test_reliability_icc.rb +8 -8
  96. data/test/test_reliability_skillscale.rb +26 -24
  97. data/test/test_resample.rb +1 -1
  98. data/test/test_statistics.rb +3 -13
  99. data/test/test_stest.rb +9 -9
  100. data/test/test_stratified.rb +3 -3
  101. data/test/test_test_t.rb +12 -12
  102. data/test/test_umannwhitney.rb +2 -2
  103. data/test/test_vector.rb +76 -613
  104. data/test/test_wilcoxonsignedrank.rb +4 -4
  105. metadata +57 -28
  106. data/lib/statsample/rserve_extension.rb +0 -20
  107. data/lib/statsample/vector/gsl.rb +0 -106
  108. data/test/fixtures/repeated_fields.csv +0 -7
  109. data/test/fixtures/scientific_notation.csv +0 -4
  110. data/test/fixtures/test_csv.csv +0 -7
  111. data/test/fixtures/test_xls.xls +0 -0
  112. data/test/test_csv.rb +0 -63
  113. data/test/test_rserve_extension.rb +0 -42
  114. data/test/test_xls.rb +0 -52
@@ -59,8 +59,6 @@ class MatrixEngine < BaseEngine
59
59
  @matrix_y = @matrix_cor.submatrix(@fields, [y_var])
60
60
  @matrix_y_cov = @matrix_cov.submatrix(@fields, [y_var])
61
61
 
62
-
63
-
64
62
  @y_sd=Math::sqrt(@matrix_cov.submatrix([y_var])[0,0])
65
63
 
66
64
  @x_sd=@n_predictors.times.inject({}) {|ac,i|
@@ -77,14 +75,14 @@ class MatrixEngine < BaseEngine
77
75
  @y_mean=0.0
78
76
  @name=_("Multiple reggresion of %s on %s") % [@fields.join(","), @y_var]
79
77
 
80
- opts_default={:digits=>3}
81
- opts=opts_default.merge opts
78
+ opts_default = {:digits=>3}
79
+ opts = opts_default.merge opts
82
80
  opts.each{|k,v|
83
81
  self.send("#{k}=",v) if self.respond_to? k
84
82
  }
85
83
  result_matrix=@matrix_x_cov.inverse * @matrix_y_cov
86
84
 
87
- if matrix._type==:covariance
85
+ if matrix._type == :covariance
88
86
  @coeffs=result_matrix.column(0).to_a
89
87
  @coeffs_stan=coeffs.collect {|k,v|
90
88
  coeffs[k]*@x_sd[k].quo(@y_sd)
@@ -116,12 +114,12 @@ class MatrixEngine < BaseEngine
116
114
  end
117
115
  # Value of constant
118
116
  def constant
119
- c=coeffs
120
- @y_mean - @fields.inject(0){|a,k| a + (c[k] * @x_mean[k])}
117
+ c = coeffs
118
+ @y_mean - @fields.inject(0) { |a,k| a + (c[k] * @x_mean[k])}
121
119
  end
122
120
  # Hash of b or raw coefficients
123
121
  def coeffs
124
- assign_names(@coeffs)
122
+ assign_names(@coeffs)
125
123
  end
126
124
  # Hash of beta or standarized coefficients
127
125
 
@@ -185,7 +183,7 @@ class MatrixEngine < BaseEngine
185
183
  sd[:constant]=0
186
184
  fields=[:constant]+@matrix_cov.fields-[@y_var]
187
185
  # Recreate X'X using the variance-covariance matrix
188
- xt_x=Matrix.rows(fields.collect {|i|
186
+ xt_x=::Matrix.rows(fields.collect {|i|
189
187
  fields.collect {|j|
190
188
  if i==:constant or j==:constant
191
189
  cov=0
@@ -8,76 +8,74 @@ module Multiple
8
8
  #
9
9
  # Example:
10
10
  #
11
- # @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:numeric)
12
- # @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:numeric)
13
- # @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:numeric)
14
- # @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:numeric)
15
- # ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
16
- # lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')
11
+ # @a = Daru::Vector.new([1,3,2,4,3,5,4,6,5,7])
12
+ # @b = Daru::Vector.new([3,3,4,4,5,5,6,6,4,4])
13
+ # @c = Daru::Vector.new([11,22,30,40,50,65,78,79,99,100])
14
+ # @y = Daru::Vector.new([3,4,5,6,7,8,9,10,20,30])
15
+ # ds = Daru::DataFrame.new({:a => @a,:b => @b,:c => @c,:y => @y})
16
+ # lr=Statsample::Regression::Multiple::RubyEngine.new(ds,:y)
17
17
 
18
18
  class RubyEngine < MatrixEngine
19
19
  def initialize(ds,y_var, opts=Hash.new)
20
- matrix=ds.correlation_matrix
21
- fields_indep=ds.fields-[y_var]
22
- default={
23
- :y_mean=>ds[y_var].mean,
24
- :x_mean=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
25
- :y_sd=>ds[y_var].sd,
26
- :x_sd=>fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
27
- :cases=>Statsample::Bivariate.min_n_valid(ds)
20
+ matrix = Statsample::Bivariate.correlation_matrix ds
21
+ fields_indep=ds.vectors.to_a - [y_var]
22
+ default= {
23
+ :y_mean => ds[y_var].mean,
24
+ :x_mean => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].mean; ac},
25
+ :y_sd => ds[y_var].sd,
26
+ :x_sd => fields_indep.inject({}) {|ac,f| ac[f]=ds[f].sd; ac},
27
+ :cases => Statsample::Bivariate.min_n_valid(ds)
28
28
  }
29
- opts=opts.merge(default)
29
+ opts = opts.merge(default)
30
30
  super(matrix, y_var, opts)
31
- @ds=ds
32
- @dy=ds[@y_var]
33
- @ds_valid=ds.dup_only_valid
34
- @total_cases=@ds.cases
35
- @valid_cases=@ds_valid.cases
36
- @ds_indep = ds.dup(ds.fields-[y_var])
31
+ @ds = ds
32
+ @dy = ds[@y_var]
33
+ @ds_valid = ds.dup_only_valid
34
+ @total_cases = @ds.nrows
35
+ @valid_cases = @ds_valid.nrows
36
+ @ds_indep = ds.dup(ds.vectors.to_a - [y_var])
37
37
  set_dep_columns
38
38
  end
39
39
 
40
40
  def set_dep_columns
41
- @dep_columns=[]
42
- @ds_indep.each_vector{|k,v|
43
- @dep_columns.push(v.data_with_nils)
44
- }
41
+ @dep_columns = []
42
+ @ds_indep.each_vector { |v| @dep_columns.push(v.to_a) }
45
43
  end
46
44
 
47
45
  def fix_with_mean
48
46
  i=0
49
- @ds_indep.each do |row|
47
+ @ds_indep.each(:row) do |row|
50
48
  empty=[]
51
49
  row.each do |k,v|
52
50
  empty.push(k) if v.nil?
53
51
  end
52
+
54
53
  if empty.size==1
55
54
  @ds_indep[empty[0]][i]=@ds[empty[0]].mean
56
55
  end
57
- i+=1
56
+ i += 1
58
57
  end
59
- @ds_indep.update_valid_data
58
+ @ds_indep.update
60
59
  set_dep_columns
61
60
  end
62
61
  def fix_with_regression
63
- i=0
64
- @ds_indep.each{|row|
65
- empty=[]
66
- row.each{|k,v|
67
- empty.push(k) if v.nil?
68
- }
62
+ i = 0
63
+ @ds_indep.each(:row) do |row|
64
+ empty = []
65
+ row.each { |k,v| empty.push(k) if v.nil? }
69
66
  if empty.size==1
70
- field=empty[0]
71
- lr=MultipleRegression.new(@ds_indep,field)
72
- fields=[]
73
- @ds_indep.fields.each{|f|
74
- fields.push(row[f]) unless f==field
67
+ field = empty[0]
68
+ lr = MultipleRegression.new(@ds_indep,field)
69
+ fields = []
70
+ @ds_indep.vectors.each { |f|
71
+ fields.push(row[f]) unless f == field
75
72
  }
73
+
76
74
  @ds_indep[field][i]=lr.process(fields)
77
75
  end
78
76
  i+=1
79
- }
80
- @ds_indep.update_valid_data
77
+ end
78
+ @ds_indep.update
81
79
  set_dep_columns
82
80
  end
83
81
  # Standard error for constant
@@ -4,30 +4,30 @@ module Statsample
4
4
  # Calculate Chonbach's alpha for a given dataset.
5
5
  # only uses tuples without missing data
6
6
  def cronbach_alpha(ods)
7
- ds=ods.dup_only_valid
8
- n_items=ds.fields.size
9
- return nil if n_items<=1
10
- s2_items=ds.vectors.inject(0) {|ac,v|
11
- ac+v[1].variance }
12
- total=ds.vector_sum
7
+ ds = ods.dup_only_valid
8
+ n_items = ds.ncols
9
+ return nil if n_items <= 1
10
+ s2_items = ds.to_hash.values.inject(0) { |ac,v|
11
+ ac + v.variance }
12
+ total = ds.vector_sum
13
13
 
14
- (n_items.quo(n_items-1)) * (1-(s2_items.quo(total.variance)))
14
+ (n_items.quo(n_items - 1)) * (1 - (s2_items.quo(total.variance)))
15
15
  end
16
16
  # Calculate Chonbach's alpha for a given dataset
17
17
  # using standarized values for every vector.
18
18
  # Only uses tuples without missing data
19
19
  # Return nil if one or more vectors has 0 variance
20
20
  def cronbach_alpha_standarized(ods)
21
+ ds = ods.dup_only_valid
22
+ return nil if ds.any? { |v| v.variance==0}
21
23
 
22
- ds=ods.dup_only_valid
23
-
24
- return nil if ds.vectors.any? {|k,v| v.variance==0}
25
-
26
- ds=ds.fields.inject({}){|a,f|
27
- a[f]=ods[f].standarized;
28
- a
29
- }.to_dataset
30
-
24
+ ds = Daru::DataFrame.new(
25
+ ds.vectors.to_a.inject({}) { |a,i|
26
+ a[i] = ods[i].standardize
27
+ a
28
+ }
29
+ )
30
+
31
31
  cronbach_alpha(ds)
32
32
  end
33
33
  # Predicted reliability of a test by replicating
@@ -54,10 +54,10 @@ module Statsample
54
54
  end
55
55
  # Get Cronbach's alpha from a covariance matrix
56
56
  def cronbach_alpha_from_covariance_matrix(cov)
57
- n=cov.row_size
57
+ n = cov.row_size
58
58
  raise "covariance matrix should have at least 2 variables" if n < 2
59
- s2=n.times.inject(0) {|ac,i| ac+cov[i,i]}
60
- (n.quo(n-1))*(1-(s2.quo(cov.total_sum)))
59
+ s2 = n.times.inject(0) { |ac,i| ac + cov[i,i] }
60
+ (n.quo(n - 1)) * (1 - (s2.quo(cov.total_sum)))
61
61
  end
62
62
  # Returns n necessary to obtain specific alpha
63
63
  # given variance and covariance mean of items
@@ -82,8 +82,6 @@ module Statsample
82
82
  end
83
83
  c_a=cronbach_alpha_from_n_s2_cov(n,s2,cov)
84
84
  dif=c_a - alpha
85
- #puts "#{n} , #{c_a}"
86
-
87
85
  end
88
86
  n
89
87
  end
@@ -110,20 +108,20 @@ module Statsample
110
108
  attr_reader :totals, :counts, :vector_total
111
109
  def initialize (ds, vector_total=nil)
112
110
  vector_total||=ds.vector_sum
113
- raise ArgumentError, "Total size != Dataset size" if vector_total.size!=ds.cases
111
+ raise ArgumentError, "Total size != Dataset size" if vector_total.size != ds.nrows
114
112
  @vector_total=vector_total
115
113
  @ds=ds
116
114
  @totals={}
117
- @counts=@ds.fields.inject({}) {|a,v| a[v]={};a}
115
+ @counts=@ds.vectors.to_a.inject({}) {|a,v| a[v]={};a}
118
116
  process
119
117
  end
120
118
  def process
121
119
  i=0
122
- @ds.each do |row|
120
+ @ds.each_row do |row|
123
121
  tot=@vector_total[i]
124
122
  @totals[tot]||=0
125
123
  @totals[tot]+=1
126
- @ds.fields.each do |f|
124
+ @ds.vectors.each do |f|
127
125
  item=row[f].to_s
128
126
  @counts[f][tot]||={}
129
127
  @counts[f][tot][item]||=0
@@ -6,12 +6,12 @@ module Statsample
6
6
  # several ratings) on a target and another measurement obtained on that target"
7
7
  # == Usage
8
8
  # require 'statsample'
9
- # size=1000
10
- # a = size.times.map {rand(10)}.to_numeric
9
+ # size = 1000
10
+ # a = Daru::Vector.new(size.times.map {rand(10)})
11
11
  # b = a.recode{|i|i+rand(4)-2}
12
- # c =a.recode{|i|i+rand(4)-2}
12
+ # c = a.recode{|i|i+rand(4)-2}
13
13
  # d = a.recode{|i|i+rand(4)-2}
14
- # ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset
14
+ # ds = Daru::DataFrame.new({:a => a,:b => b,:c => c,:d => d})
15
15
  # # Use :type attribute to set type to summarize
16
16
  # icc=Statsample::Reliability::ICC.new(ds, :type=>:icc_1_k)
17
17
  # puts icc.summary
@@ -96,10 +96,11 @@ module Statsample
96
96
  attr_accessor :alpha
97
97
  attr_accessor :name
98
98
  def initialize(ds, opts=Hash.new)
99
+ ds.update
99
100
  @ds=ds.dup_only_valid
100
- @vectors=@ds.vectors.values
101
- @n=@ds.cases
102
- @k=@ds.fields.size
101
+ @vectors=@ds.map { |e| e }
102
+ @n=@ds.nrows
103
+ @k=@ds.ncols
103
104
  compute
104
105
  @g_rho=0
105
106
  @alpha=0.05
@@ -6,17 +6,17 @@ module Statsample
6
6
  # PCA and Factor Analysis.
7
7
  #
8
8
  # == Usage
9
- # @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:numeric)
10
- # @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:numeric)
11
- # @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:numeric)
12
- # @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:numeric)
13
- # ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
9
+ # @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
10
+ # @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
11
+ # @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
12
+ # @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
13
+ # ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
14
14
  # opts={:name=>"Scales", # Name of analysis
15
15
  # :summary_correlation_matrix=>true, # Add correlation matrix
16
16
  # :summary_pca } # Add PCA between scales
17
17
  # msa=Statsample::Reliability::MultiScaleAnalysis.new(opts) do |m|
18
- # m.scale :s1, ds.clone(%w{x1 x2})
19
- # m.scale :s2, ds.clone(%w{x3 x4}), {:name=>"Scale 2"}
18
+ # m.scale :s1, ds.clone([:x1, :x2])
19
+ # m.scale :s2, ds.clone([:x3, :x4]), {:name=>"Scale 2"}
20
20
  # end
21
21
  # # Retrieve summary
22
22
  # puts msa.summary
@@ -107,7 +107,7 @@ module Statsample
107
107
  # Retrieves a Principal Component Analysis (Factor::PCA)
108
108
  # using all scales, using <tt>opts</tt> a options.
109
109
  def pca(opts=nil)
110
- opts||=pca_options
110
+ opts ||= pca_options
111
111
  Statsample::Factor::PCA.new(correlation_matrix, opts)
112
112
  end
113
113
  # Retrieve Velicer's MAP
@@ -123,14 +123,15 @@ module Statsample
123
123
  Statsample::Factor::PrincipalAxis.new(correlation_matrix, opts)
124
124
  end
125
125
  def dataset_from_scales
126
- ds=Dataset.new(@scales_keys)
126
+ ds = Daru::DataFrame.new({}, order: @scales_keys.map(&:to_sym))
127
127
  @scales.each_pair do |code,scale|
128
- ds[code.to_s]=scale.ds.vector_sum
129
- ds[code.to_s].name=scale.name
128
+ ds[code.to_sym] = scale.ds.vector_sum
130
129
  end
131
- ds.update_valid_data
130
+
131
+ ds.update
132
132
  ds
133
133
  end
134
+
134
135
  def parallel_analysis(opts=nil)
135
136
  opts||=parallel_analysis_options
136
137
  Statsample::Factor::ParallelAnalysis.new(dataset_from_scales, opts)
@@ -140,6 +141,7 @@ module Statsample
140
141
  def correlation_matrix
141
142
  Statsample::Bivariate.correlation_matrix(dataset_from_scales)
142
143
  end
144
+
143
145
  def report_building(b) # :nodoc:
144
146
  b.section(:name=>name) do |s|
145
147
  s.section(:name=>_("Reliability analysis of scales")) do |s2|
@@ -3,12 +3,12 @@ module Statsample
3
3
  # Analysis of a Scale. Analoge of Scale Reliability analysis on SPSS.
4
4
  # Returns several statistics for complete scale and each item
5
5
  # == Usage
6
- # @x1=[1,1,1,1,2,2,2,2,3,3,3,30].to_vector(:numeric)
7
- # @x2=[1,1,1,2,2,3,3,3,3,4,4,50].to_vector(:numeric)
8
- # @x3=[2,2,1,1,1,2,2,2,3,4,5,40].to_vector(:numeric)
9
- # @x4=[1,2,3,4,4,4,4,3,4,4,5,30].to_vector(:numeric)
10
- # ds={'x1'=>@x1,'x2'=>@x2,'x3'=>@x3,'x4'=>@x4}.to_dataset
11
- # ia=Statsample::Reliability::ScaleAnalysis.new(ds)
6
+ # @x1 = Daru::Vector.new([1,1,1,1,2,2,2,2,3,3,3,30])
7
+ # @x2 = Daru::Vector.new([1,1,1,2,2,3,3,3,3,4,4,50])
8
+ # @x3 = Daru::Vector.new([2,2,1,1,1,2,2,2,3,4,5,40])
9
+ # @x4 = Daru::Vector.new([1,2,3,4,4,4,4,3,4,4,5,30])
10
+ # ds = Daru::DataFrame.new({:x1 => @x1,:x2 => @x2,:x3 => @x3,:x4 => @x4})
11
+ # ia = Statsample::Reliability::ScaleAnalysis.new(ds)
12
12
  # puts ia.summary
13
13
  class ScaleAnalysis
14
14
  include Summarizable
@@ -16,40 +16,40 @@ module Statsample
16
16
  attr_accessor :name
17
17
  attr_accessor :summary_histogram
18
18
  def initialize(ds, opts=Hash.new)
19
- @dumped=ds.fields.find_all {|f|
20
- ds[f].variance==0
19
+ @dumped=ds.vectors.to_a.find_all {|f|
20
+ ds[f].variance == 0
21
21
  }
22
22
 
23
- @ods=ds
24
- @ds=ds.dup_only_valid(ds.fields - @dumped)
25
- @ds.name=ds.name
23
+ @ods = ds
24
+ @ds = ds.dup_only_valid(ds.vectors.to_a - @dumped)
25
+ @ds.rename ds.name
26
26
 
27
- @k=@ds.fields.size
28
- @total=@ds.vector_sum
27
+ @k = @ds.ncols
28
+ @total = @ds.vector_sum
29
29
  @o_total=@dumped.size > 0 ? @ods.vector_sum : nil
30
30
 
31
- @vector_mean=@ds.vector_mean
32
- @item_mean=@vector_mean.mean
33
- @item_sd=@vector_mean.sd
31
+ @vector_mean = @ds.vector_mean
32
+ @item_mean = @vector_mean.mean
33
+ @item_sd = @vector_mean.sd
34
34
 
35
- @mean=@total.mean
36
- @median=@total.median
37
-
38
- @skew=@total.skew
39
- @kurtosis=@total.kurtosis
40
- @sd = @total.sd
41
- @variance=@total.variance
42
- @valid_n = @total.size
43
- opts_default={
44
- :name=>_("Reliability Analysis"),
45
- :summary_histogram=>true
35
+ @mean = @total.mean
36
+ @median = @total.median
37
+ @skew = @total.skew
38
+ @kurtosis = @total.kurtosis
39
+ @sd = @total.sd
40
+ @variance = @total.variance
41
+ @valid_n = @total.size
42
+
43
+ opts_default = {
44
+ :name => _("Reliability Analysis"),
45
+ :summary_histogram => true
46
46
  }
47
- @opts=opts_default.merge(opts)
48
- @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
47
+ @opts = opts_default.merge(opts)
48
+ @opts.each{ |k,v| self.send("#{k}=",v) if self.respond_to? k }
49
49
 
50
50
  @cov_m=Statsample::Bivariate.covariance_matrix(@ds)
51
51
  # Mean for covariances and variances
52
- @variances=@k.times.map {|i| @cov_m[i,i]}.to_numeric
52
+ @variances = Daru::Vector.new(@k.times.map { |i| @cov_m[i,i] })
53
53
  @variances_mean=@variances.mean
54
54
  @covariances_mean=(@variance-@variances.sum).quo(@k**2-@k)
55
55
  #begin
@@ -66,7 +66,7 @@ module Statsample
66
66
  total={}
67
67
  @ds.each do |row|
68
68
  tot=@total[i]
69
- @ds.fields.each do |f|
69
+ @ds.vectors.each do |f|
70
70
  out[f]||= {}
71
71
  total[f]||={}
72
72
  out[f][tot]||= 0
@@ -87,43 +87,41 @@ module Statsample
87
87
  # Adjusted RPB(Point biserial-correlation) for each item
88
88
  #
89
89
  def item_total_correlation
90
- @itc||=@ds.fields.inject({}) do |a,v|
91
- vector=@ds[v].clone
92
- ds2=@ds.clone
93
- ds2.delete_vector(v)
94
- total=ds2.vector_sum
95
- a[v]=Statsample::Bivariate.pearson(vector,total)
90
+ vecs = @ds.vectors.to_a
91
+ @itc ||= vecs.inject({}) do |a,v|
92
+ total=@ds.vector_sum(vecs - [v])
93
+ a[v]=Statsample::Bivariate.pearson(@ds[v],total)
96
94
  a
97
95
  end
98
96
  end
99
97
  def mean_rpb
100
- item_total_correlation.values.to_numeric.mean
98
+ Daru::Vector.new(item_total_correlation.values).mean
101
99
  end
102
100
  def item_statistics
103
- @is||=@ds.fields.inject({}) do |a,v|
104
- a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
105
- a
106
- end
101
+ @is||=@ds.vectors.to_a.inject({}) do |a,v|
102
+ a[v]={:mean=>@ds[v].mean, :sds=>Math::sqrt(@cov_m.variance(v))}
103
+ a
104
+ end
107
105
  end
108
106
  # Returns a dataset with cases ordered by score
109
107
  # and variables ordered by difficulty
110
108
 
111
109
  def item_difficulty_analysis
112
110
  dif={}
113
- @ds.fields.each{|f| dif[f]=@ds[f].mean }
114
- dif_sort=dif.sort{|a,b| -(a[1]<=>b[1])}
111
+ @ds.vectors.each{|f| dif[f]=@ds[f].mean }
112
+ dif_sort = dif.sort { |a,b| -(a[1]<=>b[1]) }
115
113
  scores_sort={}
116
114
  scores=@ds.vector_mean
117
- scores.each_index{|i| scores_sort[i]=scores[i] }
115
+ scores.each_index{ |i| scores_sort[i]=scores[i] }
118
116
  scores_sort=scores_sort.sort{|a,b| a[1]<=>b[1]}
119
- ds_new=Statsample::Dataset.new(['case','score'] + dif_sort.collect{|a,b| a})
117
+ ds_new = Daru::DataFrame.new({}, order: ([:case,:score] + dif_sort.collect{|a,b| a.to_sym}))
120
118
  scores_sort.each do |i,score|
121
- row=[i, score]
122
- case_row=@ds.case_as_hash(i)
123
- dif_sort.each{|variable,dif_value| row.push(case_row[variable]) }
124
- ds_new.add_case_array(row)
119
+ row = [i, score]
120
+ case_row = @ds.row[i].to_hash
121
+ dif_sort.each{ |variable,dif_value| row.push(case_row[variable]) }
122
+ ds_new.add_row(row)
125
123
  end
126
- ds_new.update_valid_data
124
+ ds_new.update
127
125
  ds_new
128
126
  end
129
127
 
@@ -132,9 +130,10 @@ module Statsample
132
130
  end
133
131
 
134
132
  def stats_if_deleted_intern # :nodoc:
135
- return Hash.new if @ds.fields.size==1
136
- @ds.fields.inject({}) do |a,v|
137
- cov_2=@cov_m.submatrix(@ds.fields-[v])
133
+ return Hash.new if @ds.ncols == 1
134
+ vecs = @ds.vectors.to_a
135
+ vecs.inject({}) do |a,v|
136
+ cov_2=@cov_m.submatrix(vecs - [v])
138
137
  #ds2=@ds.clone
139
138
  #ds2.delete_vector(v)
140
139
  #total=ds2.vector_sum
@@ -151,11 +150,10 @@ module Statsample
151
150
  def report_building(builder) #:nodoc:
152
151
  builder.section(:name=>@name) do |s|
153
152
 
154
-
155
153
  if @dumped.size>0
156
154
  s.section(:name=>"Items with variance=0") do |s1|
157
155
  s.table(:name=>_("Summary for %s with all items") % @name) do |t|
158
- t.row [_("Items"), @ods.fields.size]
156
+ t.row [_("Items"), @ods.ncols]
159
157
  t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
160
158
  t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
161
159
  end
@@ -170,7 +168,7 @@ module Statsample
170
168
 
171
169
 
172
170
  s.table(:name=>_("Summary for %s") % @name) do |t|
173
- t.row [_("Valid Items"), @ds.fields.size]
171
+ t.row [_("Valid Items"), @ds.ncols]
174
172
 
175
173
  t.row [_("Valid cases"), @valid_n]
176
174
  t.row [_("Sum mean"), "%0.4f" % @mean]
@@ -193,8 +191,8 @@ module Statsample
193
191
  end
194
192
 
195
193
  if (@alpha)
196
- s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.fields.size))
197
- s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.fields.size))
194
+ s.text _("Items for obtain alpha(0.8) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.8, @ds.ncols))
195
+ s.text _("Items for obtain alpha(0.9) : %d" % Statsample::Reliability::n_for_desired_reliability(@alpha, 0.9, @ds.ncols))
198
196
  end
199
197
 
200
198
 
@@ -203,7 +201,7 @@ module Statsample
203
201
  itc=item_total_correlation
204
202
 
205
203
  s.table(:name=>_("Items report for %s") % @name, :header=>["item","mean","sd", "mean if deleted", "var if deleted", "sd if deleted"," item-total correl.", "alpha if deleted"]) do |t|
206
- @ds.fields.each do |f|
204
+ @ds.vectors.each do |f|
207
205
  row=["#{@ds[f].name}(#{f})"]
208
206
  if is[f]
209
207
  row+=[sprintf("%0.5f",is[f][:mean]), sprintf("%0.5f", is[f][:sds])]