statsample 0.17.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,26 @@
1
+ class ::Vector
2
+ def to_matrix
3
+ ::Matrix.columns([self.to_a])
4
+ end
5
+ end
1
6
  class ::Matrix
2
7
  def to_matrix
3
8
  self
4
9
  end
10
+ def to_dataset
11
+ f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
12
+ ds=Statsample::Dataset.new(f)
13
+ f.each do |ff|
14
+ ds[ff].type=:scale
15
+ ds[ff].name=ff
16
+ end
17
+ row_size.times {|i|
18
+ ds.add_case_array(self.row(i).to_a)
19
+ }
20
+ ds.update_valid_data
21
+ ds.name=self.name if self.respond_to? :name
22
+ ds
23
+ end
5
24
  if defined? :eigenpairs
6
25
  alias_method :eigenpairs_ruby, :eigenpairs
7
26
  end
@@ -31,6 +50,13 @@ class ::Matrix
31
50
  end
32
51
 
33
52
  module GSL
53
+ class Vector
54
+ class Col
55
+ def to_matrix
56
+ ::Matrix.columns([self.size.times.map {|i| self[i]}])
57
+ end
58
+ end
59
+ end
34
60
  class Matrix
35
61
  def to_gsl
36
62
  self
@@ -45,13 +71,54 @@ module GSL
45
71
  end
46
72
 
47
73
  module Statsample
74
+ # Module to add names to X and Y fields
75
+ module NamedMatrix
76
+ include Summarizable
77
+
78
+ def fields
79
+ raise "Should be square" if !square?
80
+ fields_x
81
+ end
82
+ def fields=(v)
83
+ raise "Matrix should be square" if !square?
84
+ @fields_x=v
85
+ @fields_y=v
86
+ end
87
+ def fields_x=(v)
88
+ raise "Size of fields != row_size" if v.size!=row_size
89
+ @fields_x=v
90
+ end
91
+ def fields_y=(v)
92
+ raise "Size of fields != column_size" if v.size!=column_size
93
+ @fields_y=v
94
+ end
95
+ def fields_x
96
+ @fields_x||=row_size.times.collect {|i| _("X%d") % i}
97
+ end
98
+ def fields_y
99
+ @fields_y||=column_size.times.collect {|i| _("Y%d") % i}
100
+ end
101
+
102
+ def name
103
+ @name||=get_new_name
104
+ end
105
+ def name=(v)
106
+ @name=v
107
+ end
108
+ def get_new_name
109
+ @@named_matrix||=0
110
+ @@named_matrix+=1
111
+ _("Matrix %d") % @@named_matrix
112
+ end
113
+
114
+ end
48
115
  # Module to add method for variance/covariance and correlation matrices
49
116
  # == Usage
50
117
  # matrix=Matrix[[1,2],[2,3]]
51
118
  # matrix.extend CovariateMatrix
52
119
  #
53
120
  module CovariateMatrix
54
- include Summarizable
121
+ include NamedMatrix
55
122
  @@covariatematrix=0
56
123
 
57
124
  # Get type of covariate matrix. Could be :covariance or :correlation
@@ -90,45 +157,19 @@ module Statsample
90
157
  self
91
158
  end
92
159
  end
93
- def fields
94
- raise "Should be square" if !square?
95
- fields_x
96
- end
97
- def fields=(v)
98
- raise "Matrix should be square" if !square?
99
- @fields_x=v
100
- @fields_y=v
101
- end
102
- def fields_x=(v)
103
- raise "Size of fields != row_size" if v.size!=row_size
104
- @fields_x=v
105
- end
106
- def fields_y=(v)
107
- raise "Size of fields != column_size" if v.size!=column_size
108
- @fields_y=v
109
- end
110
- def fields_x
111
- @fields_x||=row_size.times.collect {|i| _("X%d") % i}
112
- end
113
- def fields_y
114
- @fields_y||=column_size.times.collect {|i| _("Y%d") % i}
115
- end
116
160
 
117
- def name=(v)
118
- @name=v
119
- end
120
- def name
121
- @name||=get_new_name
122
- end
161
+
123
162
  # Get variance for field k
124
163
  #
125
164
  def variance(k)
126
165
  submatrix([k])[0,0]
127
166
  end
167
+
128
168
  def get_new_name
129
169
  @@covariatematrix+=1
130
170
  _("Covariate matrix %d") % @@covariatematrix
131
171
  end
172
+
132
173
  # Select a submatrix of factors. If you have a correlation matrix
133
174
  # with a, b and c, you could obtain a submatrix of correlations of
134
175
  # a and b, b and c or a and b
@@ -172,7 +213,9 @@ module Statsample
172
213
  @name||= (type==:correlation ? _("Correlation"):_("Covariance"))+_(" Matrix")
173
214
  generator.table(:name=>@name, :header=>[""]+fields_y) do |t|
174
215
  row_size.times {|i|
175
- t.row([fields_x[i]]+@rows[i].collect {|i1| sprintf("%0.3f",i1).gsub("0.",".")})
216
+ t.row([fields_x[i]]+@rows[i].collect {|i1|
217
+ i1.nil? ? "--" : sprintf("%0.3f",i1).gsub("0.",".")
218
+ })
176
219
  }
177
220
  end
178
221
  end
@@ -20,6 +20,36 @@ module Statsample
20
20
  }
21
21
  ms
22
22
  end
23
+ # Generate a new dataset as a union of partial dataset
24
+ # If block given, this is applied to each dataset before union
25
+ def union(&block)
26
+ union_field={}
27
+ types={}
28
+ names={}
29
+ labels={}
30
+ each do |k,ds|
31
+ if block
32
+ ds=ds.dup
33
+ yield k,ds
34
+ end
35
+ @fields.each do |f|
36
+ union_field[f]||=Array.new
37
+ union_field[f].concat(ds[f].data)
38
+ types[f]||=ds[f].type
39
+ names[f]||=ds[f].name
40
+ labels[f]||=ds[f].labels
41
+ end
42
+ end
43
+
44
+ @fields.each do |f|
45
+ union_field[f]=union_field[f].to_vector(types[f])
46
+ union_field[f].name=names[f]
47
+ union_field[f].labels=labels[f]
48
+ end
49
+ ds_union=union_field.to_dataset
50
+ ds_union.fields=@fields
51
+ ds_union
52
+ end
23
53
  def datasets_names
24
54
  @datasets.keys.sort
25
55
  end
@@ -55,6 +85,12 @@ module Statsample
55
85
  def[](i)
56
86
  @datasets[i]
57
87
  end
88
+ def each(&block)
89
+ @datasets.each {|k,ds|
90
+ next if ds.cases==0
91
+ block.call(k,ds)
92
+ }
93
+ end
58
94
  end
59
95
  class StratifiedSample
60
96
  class << self
@@ -32,23 +32,32 @@ module Statsample
32
32
  attr_accessor :summary_pca
33
33
  # Add Principal Axis to summary
34
34
  attr_accessor :summary_principal_axis
35
- # Add Parallel Analysis to summary
36
- attr_accessor :summary_parallel_analysis
37
35
  # Options for Factor::PCA object
38
36
  attr_accessor :pca_options
39
37
  # Options for Factor::PrincipalAxis
40
38
  attr_accessor :principal_axis_options
39
+
40
+ # Add Parallel Analysis to summary
41
+ attr_accessor :summary_parallel_analysis
41
42
  # Options for Parallel Analysis
42
43
  attr_accessor :parallel_analysis_options
44
+
45
+ # Add MPA to summary
46
+ attr_accessor :summary_map
47
+ # Options for MAP
48
+ attr_accessor :map_options
49
+
50
+
43
51
  # Generates a new MultiScaleAnalysis
44
52
  # Opts could be any accessor of the class
45
53
  # * :name,
46
54
  # * :summary_correlation_matrix
47
55
  # * :summary_pca
48
56
  # * :summary_principal_axis
57
+ # * :summary_map
49
58
  # * :pca_options
50
59
  # * :factor_analysis_options
51
- #
60
+ # * :map_options
52
61
  # If block given, all methods should be called
53
62
  # inside object environment.
54
63
  #
@@ -60,9 +69,11 @@ module Statsample
60
69
  :summary_pca=>false,
61
70
  :summary_principal_axis=>false,
62
71
  :summary_parallel_analysis=>false,
72
+ :summary_map=>false,
63
73
  :pca_options=>Hash.new,
64
74
  :principal_axis_options=>Hash.new,
65
- :parallel_analysis_options=>Hash.new
75
+ :parallel_analysis_options=>Hash.new,
76
+ :map_options=>Hash.new
66
77
  }
67
78
  @opts=opts_default.merge(opts)
68
79
  @opts.each{|k,v|
@@ -99,6 +110,10 @@ module Statsample
99
110
  opts||=pca_options
100
111
  Statsample::Factor::PCA.new(correlation_matrix, opts)
101
112
  end
113
+ def map(opts=nil)
114
+ opts||=map_options
115
+ Statsample::Factor::MAP.new(correlation_matrix, opts)
116
+ end
102
117
  # Retrieves a PrincipalAxis Analysis (Factor::PrincipalAxis)
103
118
  # using all scales, using <tt>opts</tt> a options.
104
119
  def principal_axis_analysis(opts=nil)
@@ -151,7 +166,11 @@ module Statsample
151
166
  s2.parse_element(parallel_analysis)
152
167
  end
153
168
  end
154
-
169
+ if summary_map
170
+ s.section(:name=>_("MAP for %s") % name) do |s2|
171
+ s2.parse_element(map)
172
+ end
173
+ end
155
174
  end
156
175
  end
157
176
  end
@@ -14,6 +14,7 @@ module Statsample
14
14
  include Summarizable
15
15
  attr_reader :ds,:mean, :sd,:valid_n, :alpha , :alpha_standarized, :variances_mean, :covariances_mean, :cov_m
16
16
  attr_accessor :name
17
+ attr_accessor :summary_histogram
17
18
  def initialize(ds, opts=Hash.new)
18
19
  @dumped=ds.fields.find_all {|f|
19
20
  ds[f].variance==0
@@ -40,9 +41,13 @@ module Statsample
40
41
  @sd = @total.sd
41
42
  @variance=@total.variance
42
43
  @valid_n = @total.size
43
- opts_default={:name=>_("Reliability Analisis")}
44
+ opts_default={
45
+ :name=>_("Reliability Analisis"),
46
+ :summary_histogram=>true
47
+ }
44
48
  @opts=opts_default.merge(opts)
45
- @name=@opts[:name]
49
+ @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
50
+
46
51
  @cov_m=Statsample::Bivariate.covariance_matrix(@ds)
47
52
  # Mean for covariances and variances
48
53
  @variances=@k.times.map {|i| @cov_m[i,i]}.to_scale
@@ -154,14 +159,13 @@ module Statsample
154
159
  t.row [_("Items"), @ods.fields.size]
155
160
  t.row [_("Sum mean"), "%0.4f" % @o_total.mean]
156
161
  t.row [_("S.d. mean"), "%0.4f" % @o_total.sd]
157
-
158
162
  end
159
-
160
163
  s.table(:name=>_("Deleted items"), :header=>['item','mean']) do |t|
161
164
  @dumped.each do |f|
162
165
  t.row(["#{@ods[f].name}(#{f})", "%0.5f" % @ods[f].mean])
163
166
  end
164
167
  end
168
+ s.parse_element(Statsample::Graph::Histogram.new(@o_total)) if @summary_histogram
165
169
  end
166
170
  end
167
171
 
@@ -225,7 +229,7 @@ module Statsample
225
229
  t.row row
226
230
  end # end each
227
231
  end # table
228
-
232
+ s.parse_element(Statsample::Graph::Histogram.new(@total)) if @summary_histogram
229
233
  end # section
230
234
  end # def
231
235
  end # class
@@ -18,7 +18,7 @@ module Statsample
18
18
  attr_accessor :summary_show_problematic_items
19
19
  def initialize(ds,key,opts=Hash.new)
20
20
  opts_default={
21
- :name=>_("Skill Scale Reliability Analysis"),
21
+ :name=>_("Skill Scale Reliability Analysis (%s)") % ds.name,
22
22
  :summary_minimal_item_correlation=>0.10,
23
23
  :summary_show_problematic_items=>true
24
24
  }
@@ -28,9 +28,17 @@ module Statsample
28
28
  @opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k }
29
29
  @cds=nil
30
30
  end
31
+ # Dataset only corrected vectors
31
32
  def corrected_dataset_minimal
32
33
  cds=corrected_dataset
33
- @key.keys.inject({}) {|ac,v| ac[v]=cds[v];ac}.to_dataset
34
+ dsm=@key.keys.inject({}) {|ac,v| ac[v]=cds[v];ac}.to_dataset
35
+ @key.keys.each do |k|
36
+ dsm[k].name=_("%s(corrected)") % @ds[k].name
37
+ dsm[k].labels=@ds[k].labels
38
+ end
39
+
40
+ dsm.name=_("Corrected dataset from %s") % @ds.name
41
+ dsm
34
42
  end
35
43
  def vector_sum
36
44
  corrected_dataset_minimal.vector_sum
@@ -74,11 +82,19 @@ module Statsample
74
82
  s.section(:name=>_("Problematic Items")) do |spi|
75
83
  count=0
76
84
  sa.item_total_correlation.each do |k,v|
77
- if v<summary_minimal_item_correlation
85
+ if v < summary_minimal_item_correlation
78
86
  count+=1
79
87
  spi.section(:name=>_("Item: %s") % @ds[k].name) do |spii|
80
88
  spii.text _("Correct answer: %s") % @key[k]
81
- spii.parse_element(@ds[k])
89
+ spii.text _("p: %0.3f") % corrected_dataset[k].mean
90
+ props=@ds[k].proportions.inject({}) {|ac,v| ac[v[0]] = v[1].to_f;ac}
91
+
92
+ spi.table(:name=>"Proportions",:header=>[_("Value"), _("%")]) do |table|
93
+ props.each do |k1,v|
94
+ table.row [ @ds[k].labeling(k1), "%0.3f" % v]
95
+ end
96
+ end
97
+
82
98
  end
83
99
  end
84
100
  end
@@ -91,12 +91,12 @@ module Statsample
91
91
  # Note: data, missing_values and labels are duplicated, so
92
92
  # changes on original vector doesn't propages to copies.
93
93
  def dup
94
- Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
94
+ Vector.new(@data.dup,@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name)
95
95
  end
96
96
  # Returns an empty duplicate of the vector. Maintains the type,
97
97
  # missing values and labels.
98
98
  def dup_empty
99
- Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=>@name.dup)
99
+ Vector.new([],@type, :missing_values => @missing_values.dup, :labels => @labels.dup, :name=> @name)
100
100
  end
101
101
  # Raises an exception if type of vector is inferior to t type
102
102
  def check_type(t)
@@ -105,31 +105,47 @@ module Statsample
105
105
  private :check_type
106
106
 
107
107
  # Return a vector usign the standarized values for data
108
- # with sd with denominator n-1. With variance=0, returns nil
108
+ # with sd with denominator n-1. With variance=0 or mean nil,
109
+ # returns a vector of equal size full of nils
109
110
  #
110
111
 
111
112
  def vector_standarized(use_population=false)
112
113
  check_type :scale
114
+ return ([nil]*size).to_scale if mean.nil?
113
115
  m=mean
114
116
  sd=use_population ? sdp : sds
115
- return nil if sd==0.0
116
- @data_with_nils.collect{|x|
117
+ return ([nil]*size).to_scale if sd==0.0
118
+ vector=@data_with_nils.collect{|x|
117
119
  if !x.nil?
118
120
  (x.to_f - m).quo(sd)
119
121
  else
120
122
  nil
121
123
  end
122
124
  }.to_vector(:scale)
125
+ vector.name=_("%s(standarized)") % @name
126
+ vector
127
+ end
128
+ # Return a centered vector
129
+ def vector_centered
130
+ check_type :scale
131
+ m=mean
132
+ vector=@data_with_nils.collect {|x|
133
+ x.nil? ? nil : x.to_f-m
134
+ }.to_scale
135
+ vector.name=_("%s(centered)") % @name
136
+ vector
123
137
  end
124
138
 
125
139
  alias_method :standarized, :vector_standarized
140
+ alias_method :centered, :vector_centered
126
141
  # Return a vector with values replaced with the percentiles
127
142
  # of each values
128
143
  def vector_percentil
129
144
  check_type :ordinal
130
- c=size
131
- ranked.map {|i| (i.quo(c)*100).to_f }.to_vector(@type)
132
-
145
+ c=@valid_data.size
146
+ vector=ranked.map {|i| i.nil? ? nil : (i.quo(c)*100).to_f }.to_vector(@type)
147
+ vector.name=_("%s(percentil)") % @name
148
+ vector
133
149
  end
134
150
  def box_cox_transformation(lambda) # :nodoc:
135
151
  raise "Should be a scale" unless @type==:scale
@@ -181,17 +197,17 @@ module Statsample
181
197
  # If parameter if defined, this value and lower
182
198
  # will be 0 and higher, 1
183
199
  def dichotomize(low=nil)
184
- fs=factors
185
- low||=factors.min
186
- @data_with_nils.collect{|x|
187
- if x.nil?
188
- nil
189
- elsif x>low
190
- 1
191
- else
192
- 0
193
- end
194
- }.to_scale
200
+ fs=factors
201
+ low||=factors.min
202
+ @data_with_nils.collect{|x|
203
+ if x.nil?
204
+ nil
205
+ elsif x>low
206
+ 1
207
+ else
208
+ 0
209
+ end
210
+ }.to_scale
195
211
  end
196
212
  # Iterate on each item.
197
213
  # Equivalent to
@@ -268,7 +284,7 @@ module Statsample
268
284
  # Retrieves label for value x. Retrieves x if
269
285
  # no label defined.
270
286
  def labeling(x)
271
- @labels.has_key?(x) ? @labels[x].to_s : x.to_s
287
+ @labels.has_key?(x) ? @labels[x].to_s : x.to_s
272
288
  end
273
289
  # Returns a Vector with data with labels replaced by the label.
274
290
  def vector_labeled
@@ -304,22 +320,22 @@ module Statsample
304
320
  # if update_valid = false, you should use
305
321
  # set_valid_data after all changes
306
322
  def missing_values=(vals)
307
- @missing_values = vals
308
- set_valid_data
323
+ @missing_values = vals
324
+ set_valid_data
309
325
  end
310
326
  # Set data considered as "today" on data vectors
311
327
  def today_values=(vals)
312
- @today_values = vals
313
- set_valid_data
328
+ @today_values = vals
329
+ set_valid_data
314
330
  end
315
331
  # Set level of measurement.
316
332
  def type=(t)
317
- @type=t
318
- set_scale_data if(t==:scale)
319
- set_date_data if (t==:date)
333
+ @type=t
334
+ set_scale_data if(t==:scale)
335
+ set_date_data if (t==:date)
320
336
  end
321
337
  def to_a
322
- @data.dup
338
+ @data.dup
323
339
  end
324
340
  alias_method :to_ary, :to_a
325
341
 
@@ -532,12 +548,12 @@ module Statsample
532
548
  # Ugly name. Really, create a Vector for standard 'matrix' package.
533
549
  # <tt>dir</tt> could be :horizontal or :vertical
534
550
  def to_matrix(dir=:horizontal)
535
- case dir
536
- when :horizontal
537
- Matrix[@data]
538
- when :vertical
539
- Matrix.columns([@data])
540
- end
551
+ case dir
552
+ when :horizontal
553
+ Matrix[@data]
554
+ when :vertical
555
+ Matrix.columns([@data])
556
+ end
541
557
  end
542
558
  def inspect
543
559
  self.to_s
@@ -553,22 +569,22 @@ module Statsample
553
569
  end
554
570
  end
555
571
  if Statsample::STATSAMPLE__.respond_to?(:frequencies)
556
- # Returns a hash with the distribution of frecuencies for
557
- # the sample
558
- def frequencies
559
- Statsample::STATSAMPLE__.frequencies(@valid_data)
560
- end
572
+ # Returns a hash with the distribution of frecuencies for
573
+ # the sample
574
+ def frequencies
575
+ Statsample::STATSAMPLE__.frequencies(@valid_data)
576
+ end
561
577
  else
562
- def frequencies #:nodoc:
563
- _frequencies
564
- end
578
+ def frequencies #:nodoc:
579
+ _frequencies
580
+ end
565
581
  end
566
582
  def _frequencies #:nodoc:
567
- @valid_data.inject(Hash.new) {|a,x|
568
- a[x]||=0
569
- a[x]=a[x]+1
570
- a
571
- }
583
+ @valid_data.inject(Hash.new) {|a,x|
584
+ a[x]||=0
585
+ a[x]=a[x]+1
586
+ a
587
+ }
572
588
  end
573
589
 
574
590
  # Returns the most frequent item.
@@ -812,8 +828,7 @@ module Statsample
812
828
  end
813
829
  def mean # :nodoc:
814
830
  check_type :scale
815
-
816
- @gsl.mean
831
+ @gsl.nil? ? nil : @gsl.mean
817
832
  end
818
833
  def variance_sample(m=nil) # :nodoc:
819
834
  check_type :scale
@@ -822,6 +837,7 @@ module Statsample
822
837
  end
823
838
  def standard_deviation_sample(m=nil) # :nodoc:
824
839
  check_type :scale
840
+ return nil if @gsl.nil?
825
841
  m||=mean
826
842
  @gsl.sd(m)
827
843
  end