statsample 0.17.0 → 0.18.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,26 @@
1
+ class ::Vector
2
+ def to_matrix
3
+ ::Matrix.columns([self.to_a])
4
+ end
5
+ end
1
6
  class ::Matrix
2
7
  def to_matrix
3
8
  self
4
9
  end
10
+ def to_dataset
11
+ f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
12
+ ds=Statsample::Dataset.new(f)
13
+ f.each do |ff|
14
+ ds[ff].type=:scale
15
+ ds[ff].name=ff
16
+ end
17
+ row_size.times {|i|
18
+ ds.add_case_array(self.row(i).to_a)
19
+ }
20
+ ds.update_valid_data
21
+ ds.name=self.name if self.respond_to? :name
22
+ ds
23
+ end
5
24
  if defined? :eigenpairs
6
25
  alias_method :eigenpairs_ruby, :eigenpairs
7
26
  end
@@ -31,6 +50,13 @@ class ::Matrix
31
50
  end
32
51
 
33
52
  module GSL
53
+ class Vector
54
+ class Col
55
+ def to_matrix
56
+ ::Matrix.columns([self.size.times.map {|i| self[i]}])
57
+ end
58
+ end
59
+ end
34
60
  class Matrix
35
61
  def to_gsl
36
62
  self
@@ -45,13 +71,54 @@ module GSL
45
71
  end
46
72
 
47
73
  module Statsample
74
+ # Module to add names to X and Y fields
75
+ module NamedMatrix
76
+ include Summarizable
77
+
78
+ def fields
79
+ raise "Should be square" if !square?
80
+ fields_x
81
+ end
82
+ def fields=(v)
83
+ raise "Matrix should be square" if !square?
84
+ @fields_x=v
85
+ @fields_y=v
86
+ end
87
+ def fields_x=(v)
88
+ raise "Size of fields != row_size" if v.size!=row_size
89
+ @fields_x=v
90
+ end
91
+ def fields_y=(v)
92
+ raise "Size of fields != column_size" if v.size!=column_size
93
+ @fields_y=v
94
+ end
95
+ def fields_x
96
+ @fields_x||=row_size.times.collect {|i| _("X%d") % i}
97
+ end
98
+ def fields_y
99
+ @fields_y||=column_size.times.collect {|i| _("Y%d") % i}
100
+ end
101
+
102
+ def name
103
+ @name||=get_new_name
104
+ end
105
+ def name=(v)
106
+ @name=v
107
+ end
108
+ def get_new_name
109
+ @@named_matrix||=0
110
+ @@named_matrix+=1
111
+ _("Matrix %d") % @@named_matrix
112
+ end
113
+
114
+ end
48
115
  # Module to add method for variance/covariance and correlation matrices
49
116
  # == Usage
50
117
  # matrix=Matrix[[1,2],[2,3]]
51
118
  # matrix.extend CovariateMatrix
52
119
  #
53
120
  module CovariateMatrix
54
- include Summarizable
121
+ include NamedMatrix
55
122
  @@covariatematrix=0
56
123
 
57
124
  # Get type of covariate matrix. Could be :covariance or :correlation
@@ -90,45 +157,19 @@ module Statsample
90
157
  self
91
158
  end
92
159
  end
93
- def fields
94
- raise "Should be square" if !square?
95
- fields_x
96
- end
97
- def fields=(v)
98
- raise "Matrix should be square" if !square?
99
- @fields_x=v
100
- @fields_y=v
101
- end
102
- def fields_x=(v)
103
- raise "Size of fields != row_size" if v.size!=row_size
104
- @fields_x=v
105
- end
106
- def fields_y=(v)
107
- raise "Size of fields != column_size" if v.size!=column_size
108
- @fields_y=v
109
- end
110
- def fields_x
111
- @fields_x||=row_size.times.collect {|i| _("X%d") % i}
112
- end
113
- def fields_y
114
- @fields_y||=column_size.times.collect {|i| _("Y%d") % i}
115
- end
116
160
 
117
- def name=(v)
118
- @name=v
119
- end
120
- def name
121
- @name||=get_new_name
122
- end
161
+
123
162
  # Get variance for field k
124
163
  #
125
164
  def variance(k)
126
165
  submatrix([k])[0,0]
127
166
  end
167
+
128
168
  def get_new_name
129
169
  @@covariatematrix+=1
130
170
  _("Covariate matrix %d") % @@covariatematrix
131
171
  end
172
+
132
173
  # Select a submatrix of factors. If you have a correlation matrix
133
174
  # with a, b and c, you could obtain a submatrix of correlations of
134
175
  # a and b, b and c or a and b
@@ -172,7 +213,9 @@ module Statsample
172
213
  @name||= (type==:correlation ? _("Correlation"):_("Covariance"))+_(" Matrix")
173
214
  generator.table(:name=>@name, :header=>[""]+fields_y) do |t|
174
215
  row_size.times {|i|
175
- t.row([fields_x[i]]+@rows[i].collect {|i1| sprintf("%0.3f",i1).gsub("0.",".")})
216
+ t.row([fields_x[i]]+@rows[i].collect {|i1|
217
+ i1.nil? ? "--" : sprintf("%0.3f",i1).gsub("0.",".")
218
+ })
176
219
  }
177
220
  end
178
221
  end
@@ -20,6 +20,36 @@ module Statsample
20
20
  }
21
21
  ms
22
22
  end
23
+ # Generate a new dataset as a union of partial dataset
24
+ # If block given, this is applied to each dataset before union
25
+ def union(&block)
26
+ union_field={}
27
+ types={}
28
+ names={}
29
+ labels={}
30
+ each do |k,ds|
31
+ if block
32
+ ds=ds.dup
33
+ yield k,ds
34
+ end
35
+ @fields.each do |f|
36
+ union_field[f]||=Array.new
37
+ union_field[f].concat(ds[f].data)
38
+ types[f]||=ds[f].type
39
+ names[f]||=ds[f].name
40
+ labels[f]||=ds[f].labels
41
+ end
42
+ end
43
+
44
+ @fields.each do |f|
45
+ union_field[f]=union_field[f].to_vector(types[f])
46
+ union_field[f].name=names[f]
47
+ union_field[f].labels=labels[f]
48
+ end
49
+ ds_union=union_field.to_dataset
50
+ ds_union.fields=@fields
51
+ ds_union
52
+ end
23
53
  def datasets_names
24
54
  @datasets.keys.sort
25
55
  end
@@ -55,6 +85,12 @@ module Statsample
55
85
  def[](i)
56
86
  @datasets[i]
57
87
  end
88
+ def each(&block)
89
+ @datasets.each {|k,ds|
90
+ next if ds.cases==0
91
+ block.call(k,ds)
92
+ }
93
+ end
58
94
  end
59
95
  class StratifiedSample
60
96
  class << self
@@ -32,23 +32,32 @@ module Statsample
32
32
  attr_accessor :summary_pca
33
33
  # Add Principal Axis to summary
34
34
  attr_accessor :summary_principal_axis
35
- # Add Parallel Analysis to summary
36
- attr_accessor :summary_parallel_analysis
37
35
  # Options for Factor::PCA object
38
36
  attr_accessor :pca_options
39
37
  # Options for Factor::PrincipalAxis
40
38
  attr_accessor :principal_axis_options
39
+
40
+ # Add Parallel Analysis to summary
41
+ attr_accessor :summary_parallel_analysis
41
42
  # Options for Parallel Analysis
42
43
  attr_accessor :parallel_analysis_options
44
+
45
+ # Add MPA to summary
46
+ attr_accessor :summary_map
47
+ # Options for MAP
48
+ attr_accessor :map_options
49
+
50
+
43
51
  # Generates a new MultiScaleAnalysis
44
52
  # Opts could be any accessor of the class
45
53
  # * :name,
46
54
  # * :summary_correlation_matrix
47
55
  # * :summary_pca
48
56
  # * :summary_principal_axis
57
+ # * :summary_map
49
58
  # * :pca_options
50
59
  # * :factor_analysis_options
51
- #
60
+ # * :map_options
52
61
  # If block given, all methods should be called
53
62
  # inside object environment.
54
63
  #
@@ -60,9 +69,11 @@ module Statsample
60
69
  :summary_pca=>false,
61
70
  :summary_principal_axis=>false,
62
71
  :summary_parallel_analysis=>false,
72
+ :summary_map=>false,
63
73
  :pca_options=>Hash.new,
64
74
  :principal_axis_options=>Hash.new,
65
- :parallel_analysis_options=>Hash.new
75
+ :parallel_analysis_options=>Hash.new,
76
+ :map_options=>Hash.new
66
77
  }
67
78
  @opts=opts_default.merge(opts)
68
79
  @opts.each{|k,v|
@@ -99,6 +110,10 @@ module Statsample
99
110
  opts||=pca_options
100
111
  Statsample::Factor::PCA.new(correlation_matrix, opts)
101
112
  end
113
+ def map(opts=nil)
114
+ opts||=map_options
115
+ Statsample::Factor::MAP.new(correlation_matrix, opts)
116
+ end
102
117
  # Retrieves a PrincipalAxis Analysis (Factor::PrincipalAxis)
103
118
  # using all scales, using <tt>opts</tt> a options.
104
119
  def principal_axis_analysis(opts=nil)
@@ -151,7 +166,11 @@ module Statsample
151
166
  s2.parse_element(parallel_analysis)
152
167
  end
153
168
  end
154
-
169
+ if summary_map
170
+ s.section(:name=>_("MAP for %s") % name) do |s2|
171
+ s2.parse_element(map)
172
+ end
173
+ end
155
174
  end
156
175
  end
157
176
  end
@@ -14,6 +14,7 @@ module Statsample
14
14
  include Summarizable
15
15
  attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
16
16
  attr_accessor :name
17
+ attr_accessor :summary_histogram
17
18
  def initialize(ds, opts=Hash.new)
18
19
  @dumped=ds.fields.find_all {|f|
19
20
  ds[f].variance==0
@@ -40,9 +41,13 @@ module Statsample
40
41
  @sd = @total.sd
41
42
  @variance=@total.variance
42
43
  @valid_n = @total.size
43
- opts_default={:name=>_("Reliability Analisis")}
44
+ opts_default={
45
+ :name=>_("Reliability Analisis"),
46
+ :summary_histogram=>true
47
+ }
44
48
  @opts=opts_default.merge(opts)
45
- @name=@opts[:name]
49
+ @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
50
+
46
51
  @cov_m=Statsample::Bivariate.covariance_matrix(@ds)
47
52
  # Mean for covariances and variances
48
53
  @variances=@k.times.map {|i| @cov_m[i,i]}.to_scale
@@ -154,14 +159,13 @@ module Statsample
154
159
  t.row [_("Items"), @ods.fields.size]
155
160
  t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
156
161
  t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
157
-
158
162
  end
159
-
160
163
  s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
161
164
  @dumped.each do |f|
162
165
  t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
163
166
  end
164
167
  end
168
+ s.parse_element(Statsample::Graph::Histogram.new(@o_total)) if @summary_histogram
165
169
  end
166
170
  end
167
171
 
@@ -225,7 +229,7 @@ module Statsample
225
229
  t.row row
226
230
  end # end each
227
231
  end # table
228
-
232
+ s.parse_element(Statsample::Graph::Histogram.new(@total)) if @summary_histogram
229
233
  end # section
230
234
  end # def
231
235
  end # class
@@ -18,7 +18,7 @@ module Statsample
18
18
  attr_accessor :summary_show_problematic_items
19
19
  def initialize(ds,key,opts=Hash.new)
20
20
  opts_default={
21
- :name=>_("Skill Scale Reliability Analysis"),
21
+ :name=>_("Skill Scale Reliability Analysis (%s)") % ds.name,
22
22
  :summary_minimal_item_correlation=>0.10,
23
23
  :summary_show_problematic_items=>true
24
24
  }
@@ -28,9 +28,17 @@ module Statsample
28
28
  @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
29
29
  @cds=nil
30
30
  end
31
+ # Dataset only corrected vectors
31
32
  def corrected_dataset_minimal
32
33
  cds=corrected_dataset
33
- @key.keys.inject({}) {|ac,v| ac[v]=cds[v];ac}.to_dataset
34
+ dsm=@key.keys.inject({}) {|ac,v| ac[v]=cds[v];ac}.to_dataset
35
+ @key.keys.each do |k|
36
+ dsm[k].name=_("%s(corrected)") % @ds[k].name
37
+ dsm[k].labels=@ds[k].labels
38
+ end
39
+
40
+ dsm.name=_("Corrected dataset from %s") % @ds.name
41
+ dsm
34
42
  end
35
43
  def vector_sum
36
44
  corrected_dataset_minimal.vector_sum
@@ -74,11 +82,19 @@ module Statsample
74
82
  s.section(:name=>_("Problematic Items")) do |spi|
75
83
  count=0
76
84
  sa.item_total_correlation.each do |k,v|
77
- if v<summary_minimal_item_correlation
85
+ if v < summary_minimal_item_correlation
78
86
  count+=1
79
87
  spi.section(:name=>_("Item: %s") % @ds[k].name) do |spii|
80
88
  spii.text _("Correct answer: %s") % @key[k]
81
- spii.parse_element(@ds[k])
89
+ spii.text _("p: %0.3f") % corrected_dataset[k].mean
90
+ props=@ds[k].proportions.inject({}) {|ac,v| ac[v[0]] = v[1].to_f;ac}
91
+
92
+ spi.table(:name=>"Proportions",:header=>[_("Value"), _("%")]) do |table|
93
+ props.each do |k1,v|
94
+ table.row [ @ds[k].labeling(k1), "%0.3f" % v]
95
+ end
96
+ end
97
+
82
98
  end
83
99
  end
84
100
  end
@@ -91,12 +91,12 @@ module Statsample
91
91
  # Note: data, missing_values and labels are duplicated, so
92
92
  # changes on original vector doesn't propages to copies.
93
93
  def dup
94
- Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
94
+ Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name)
95
95
  end
96
96
  # Returns an empty duplicate of the vector. Maintains the type,
97
97
  # missing values and labels.
98
98
  def dup_empty
99
- Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
99
+ Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=> @name)
100
100
  end
101
101
  # Raises an exception if type of vector is inferior to t type
102
102
  def check_type(t)
@@ -105,31 +105,47 @@ module Statsample
105
105
  private :check_type
106
106
 
107
107
  # Return a vector usign the standarized values for data
108
- # with sd with denominator n-1. With variance=0, returns nil
108
+ # with sd with denominator n-1. With variance=0 or mean nil,
109
+ # returns a vector of equal size full of nils
109
110
  #
110
111
 
111
112
  def vector_standarized(use_population=false)
112
113
  check_type :scale
114
+ return ([nil]*size).to_scale if mean.nil?
113
115
  m=mean
114
116
  sd=use_population ? sdp : sds
115
- return nil if sd==0.0
116
- @data_with_nils.collect{|x|
117
+ return ([nil]*size).to_scale if sd==0.0
118
+ vector=@data_with_nils.collect{|x|
117
119
  if !x.nil?
118
120
  (x.to_f - m).quo(sd)
119
121
  else
120
122
  nil
121
123
  end
122
124
  }.to_vector(:scale)
125
+ vector.name=_("%s(standarized)") % @name
126
+ vector
127
+ end
128
+ # Return a centered vector
129
+ def vector_centered
130
+ check_type :scale
131
+ m=mean
132
+ vector=@data_with_nils.collect {|x|
133
+ x.nil? ? nil : x.to_f-m
134
+ }.to_scale
135
+ vector.name=_("%s(centered)") % @name
136
+ vector
123
137
  end
124
138
 
125
139
  alias_method :standarized, :vector_standarized
140
+ alias_method :centered, :vector_centered
126
141
  # Return a vector with values replaced with the percentiles
127
142
  # of each values
128
143
  def vector_percentil
129
144
  check_type :ordinal
130
- c=size
131
- ranked.map {|i| (i.quo(c)*100).to_f }.to_vector(@type)
132
-
145
+ c=@valid_data.size
146
+ vector=ranked.map {|i| i.nil? ? nil : (i.quo(c)*100).to_f }.to_vector(@type)
147
+ vector.name=_("%s(percentil)") % @name
148
+ vector
133
149
  end
134
150
  def box_cox_transformation(lambda) # :nodoc:
135
151
  raise "Should be a scale" unless @type==:scale
@@ -181,17 +197,17 @@ module Statsample
181
197
  # If parameter if defined, this value and lower
182
198
  # will be 0 and higher, 1
183
199
  def dichotomize(low=nil)
184
- fs=factors
185
- low||=factors.min
186
- @data_with_nils.collect{|x|
187
- if x.nil?
188
- nil
189
- elsif x>low
190
- 1
191
- else
192
- 0
193
- end
194
- }.to_scale
200
+ fs=factors
201
+ low||=factors.min
202
+ @data_with_nils.collect{|x|
203
+ if x.nil?
204
+ nil
205
+ elsif x>low
206
+ 1
207
+ else
208
+ 0
209
+ end
210
+ }.to_scale
195
211
  end
196
212
  # Iterate on each item.
197
213
  # Equivalent to
@@ -268,7 +284,7 @@ module Statsample
268
284
  # Retrieves label for value x. Retrieves x if
269
285
  # no label defined.
270
286
  def labeling(x)
271
- @labels.has_key?(x) ? @labels[x].to_s : x.to_s
287
+ @labels.has_key?(x) ? @labels[x].to_s : x.to_s
272
288
  end
273
289
  # Returns a Vector with data with labels replaced by the label.
274
290
  def vector_labeled
@@ -304,22 +320,22 @@ module Statsample
304
320
  # if update_valid = false, you should use
305
321
  # set_valid_data after all changes
306
322
  def missing_values=(vals)
307
- @missing_values = vals
308
- set_valid_data
323
+ @missing_values = vals
324
+ set_valid_data
309
325
  end
310
326
  # Set data considered as "today" on data vectors
311
327
  def today_values=(vals)
312
- @today_values = vals
313
- set_valid_data
328
+ @today_values = vals
329
+ set_valid_data
314
330
  end
315
331
  # Set level of measurement.
316
332
  def type=(t)
317
- @type=t
318
- set_scale_data if(t==:scale)
319
- set_date_data if (t==:date)
333
+ @type=t
334
+ set_scale_data if(t==:scale)
335
+ set_date_data if (t==:date)
320
336
  end
321
337
  def to_a
322
- @data.dup
338
+ @data.dup
323
339
  end
324
340
  alias_method :to_ary, :to_a
325
341
 
@@ -532,12 +548,12 @@ module Statsample
532
548
  # Ugly name. Really, create a Vector for standard 'matrix' package.
533
549
  # <tt>dir</tt> could be :horizontal or :vertical
534
550
  def to_matrix(dir=:horizontal)
535
- case dir
536
- when :horizontal
537
- Matrix[@data]
538
- when :vertical
539
- Matrix.columns([@data])
540
- end
551
+ case dir
552
+ when :horizontal
553
+ Matrix[@data]
554
+ when :vertical
555
+ Matrix.columns([@data])
556
+ end
541
557
  end
542
558
  def inspect
543
559
  self.to_s
@@ -553,22 +569,22 @@ module Statsample
553
569
  end
554
570
  end
555
571
  if Statsample::STATSAMPLE__.respond_to?(:frequencies)
556
- # Returns a hash with the distribution of frecuencies for
557
- # the sample
558
- def frequencies
559
- Statsample::STATSAMPLE__.frequencies(@valid_data)
560
- end
572
+ # Returns a hash with the distribution of frecuencies for
573
+ # the sample
574
+ def frequencies
575
+ Statsample::STATSAMPLE__.frequencies(@valid_data)
576
+ end
561
577
  else
562
- def frequencies #:nodoc:
563
- _frequencies
564
- end
578
+ def frequencies #:nodoc:
579
+ _frequencies
580
+ end
565
581
  end
566
582
  def _frequencies #:nodoc:
567
- @valid_data.inject(Hash.new) {|a,x|
568
- a[x]||=0
569
- a[x]=a[x]+1
570
- a
571
- }
583
+ @valid_data.inject(Hash.new) {|a,x|
584
+ a[x]||=0
585
+ a[x]=a[x]+1
586
+ a
587
+ }
572
588
  end
573
589
 
574
590
  # Returns the most frequent item.
@@ -812,8 +828,7 @@ module Statsample
812
828
  end
813
829
  def mean # :nodoc:
814
830
  check_type :scale
815
-
816
- @gsl.mean
831
+ @gsl.nil? ? nil : @gsl.mean
817
832
  end
818
833
  def variance_sample(m=nil) # :nodoc:
819
834
  check_type :scale
@@ -822,6 +837,7 @@ module Statsample
822
837
  end
823
838
  def standard_deviation_sample(m=nil) # :nodoc:
824
839
  check_type :scale
840
+ return nil if @gsl.nil?
825
841
  m||=mean
826
842
  @gsl.sd(m)
827
843
  end