statsample 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +14 -0
- data/Gemfile +1 -16
- data/History.txt +51 -46
- data/LICENSE.txt +7 -82
- data/README.md +145 -150
- data/Rakefile +20 -102
- data/lib/spss.rb +17 -14
- data/lib/statsample/crosstab.rb +2 -2
- data/lib/statsample/dataset.rb +82 -81
- data/lib/statsample/matrix.rb +43 -43
- data/lib/statsample/reliability.rb +1 -2
- data/lib/statsample/vector.rb +157 -124
- data/lib/statsample/version.rb +1 -1
- data/lib/statsample.rb +91 -91
- data/references.txt +2 -1
- data/statsample.gemspec +89 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_crosstab.rb +8 -0
- data/test/test_histogram.rb +7 -0
- data/test/test_vector.rb +62 -48
- metadata +109 -120
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -78
- data/Manifest.txt +0 -157
- data/setup.rb +0 -1585
data/lib/statsample/dataset.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'statsample/vector'
|
2
2
|
|
3
3
|
class Hash
|
4
|
-
# Creates a Statsample::Dataset based on a Hash
|
4
|
+
# Creates a Statsample::Dataset based on a Hash
|
5
5
|
def to_dataset(*args)
|
6
6
|
Statsample::Dataset.new(self, *args)
|
7
7
|
end
|
@@ -29,15 +29,15 @@ module Statsample
|
|
29
29
|
m
|
30
30
|
end
|
31
31
|
end
|
32
|
-
# Set of cases with values for one or more variables,
|
32
|
+
# Set of cases with values for one or more variables,
|
33
33
|
# analog to a dataframe on R or a standard data file of SPSS.
|
34
34
|
# Every vector has <tt>#field</tt> name, which represent it. By default,
|
35
|
-
# the vectors are ordered by it field name, but you can change it
|
35
|
+
# the vectors are ordered by it field name, but you can change it
|
36
36
|
# the fields order manually.
|
37
37
|
# The Dataset work as a Hash, with keys are field names
|
38
|
-
# and values are Statsample::Vector
|
39
|
-
#
|
40
|
-
#
|
38
|
+
# and values are Statsample::Vector
|
39
|
+
#
|
40
|
+
#
|
41
41
|
# ==Usage
|
42
42
|
# Create a empty dataset:
|
43
43
|
# Dataset.new()
|
@@ -46,7 +46,7 @@ module Statsample
|
|
46
46
|
# Create a dataset with two vectors, called <tt>v1</tt>
|
47
47
|
# and <tt>v2</tt>:
|
48
48
|
# Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
|
49
|
-
# Create a dataset with two given vectors (v1 and v2),
|
49
|
+
# Create a dataset with two given vectors (v1 and v2),
|
50
50
|
# with vectors on inverted order:
|
51
51
|
# Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
|
52
52
|
#
|
@@ -54,8 +54,8 @@ module Statsample
|
|
54
54
|
# field order as arguments
|
55
55
|
# v1 = [1,2,3].to_scale
|
56
56
|
# v2 = [1,2,3].to_scale
|
57
|
-
# ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
|
58
|
-
|
57
|
+
# ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
|
58
|
+
|
59
59
|
class Dataset
|
60
60
|
include Writable
|
61
61
|
include Summarizable
|
@@ -99,7 +99,7 @@ module Statsample
|
|
99
99
|
;a}
|
100
100
|
values.each_index{|i|
|
101
101
|
h_rows[rows[i]][columns[i]]=values[i]
|
102
|
-
}
|
102
|
+
}
|
103
103
|
ds=Dataset.new(["_id"]+cols_values)
|
104
104
|
cols_values.each{|c|
|
105
105
|
ds[c].type=values.type
|
@@ -121,15 +121,15 @@ module Statsample
|
|
121
121
|
end
|
122
122
|
# Return a nested hash using fields as keys and
|
123
123
|
# an array constructed of hashes with other values.
|
124
|
-
# If block provided, is used to provide the
|
125
|
-
# values, with parameters +row+ of dataset,
|
124
|
+
# If block provided, is used to provide the
|
125
|
+
# values, with parameters +row+ of dataset,
|
126
126
|
# +current+ last hash on hierarchy and
|
127
127
|
# +name+ of the key to include
|
128
128
|
def nest(*tree_keys,&block)
|
129
129
|
tree_keys=tree_keys[0] if tree_keys[0].is_a? Array
|
130
|
-
out=Hash.new
|
130
|
+
out=Hash.new
|
131
131
|
each do |row|
|
132
|
-
current=out
|
132
|
+
current=out
|
133
133
|
# Create tree
|
134
134
|
tree_keys[0,tree_keys.size-1].each do |f|
|
135
135
|
root=row[f]
|
@@ -162,7 +162,7 @@ module Statsample
|
|
162
162
|
@cases=0
|
163
163
|
@gsl=nil
|
164
164
|
@i=nil
|
165
|
-
|
165
|
+
|
166
166
|
if vectors.instance_of? Array
|
167
167
|
@fields=vectors.dup
|
168
168
|
@vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
|
@@ -174,10 +174,10 @@ module Statsample
|
|
174
174
|
check_length
|
175
175
|
end
|
176
176
|
end
|
177
|
-
#
|
177
|
+
#
|
178
178
|
# Creates a copy of the given dataset, deleting all the cases with
|
179
179
|
# missing data on one of the vectors.
|
180
|
-
#
|
180
|
+
#
|
181
181
|
# @param array of fields to include. No value include all fields
|
182
182
|
#
|
183
183
|
def dup_only_valid(*fields_to_include)
|
@@ -201,12 +201,12 @@ module Statsample
|
|
201
201
|
ds
|
202
202
|
end
|
203
203
|
#
|
204
|
-
# Returns a duplicate of the Dataset.
|
204
|
+
# Returns a duplicate of the Dataset.
|
205
205
|
# All vectors are copied, so any modification on new
|
206
206
|
# dataset doesn't affect original dataset's vectors.
|
207
207
|
# If fields given as parameter, only include those vectors.
|
208
208
|
#
|
209
|
-
# @param array of fields to include. No value include all fields
|
209
|
+
# @param array of fields to include. No value include all fields
|
210
210
|
# @return {Statsample::Dataset}
|
211
211
|
def dup(*fields_to_include)
|
212
212
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
@@ -224,15 +224,15 @@ module Statsample
|
|
224
224
|
ds.name= self.name
|
225
225
|
ds
|
226
226
|
end
|
227
|
-
|
228
|
-
|
227
|
+
|
228
|
+
|
229
229
|
# Returns an array with the fields from first argumen to last argument
|
230
230
|
def from_to(from,to)
|
231
231
|
raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
|
232
232
|
raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
|
233
233
|
@fields.slice(@fields.index(from)..@fields.index(to))
|
234
234
|
end
|
235
|
-
|
235
|
+
|
236
236
|
# Returns (when possible) a cheap copy of dataset.
|
237
237
|
# If no vector have missing values, returns original vectors.
|
238
238
|
# If missing values presents, uses Dataset.dup_only_valid.
|
@@ -253,7 +253,7 @@ module Statsample
|
|
253
253
|
# Returns a shallow copy of Dataset.
|
254
254
|
# Object id will be distinct, but @vectors will be the same.
|
255
255
|
# @param array of fields to include. No value include all fields
|
256
|
-
# @return {Statsample::Dataset}
|
256
|
+
# @return {Statsample::Dataset}
|
257
257
|
def clone(*fields_to_include)
|
258
258
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
259
259
|
fields_to_include=fields_to_include[0]
|
@@ -280,7 +280,7 @@ module Statsample
|
|
280
280
|
Dataset.new(vectors,@fields.dup)
|
281
281
|
end
|
282
282
|
# Merge vectors from two datasets
|
283
|
-
# In case of name collition, the vectors names are changed to
|
283
|
+
# In case of name collition, the vectors names are changed to
|
284
284
|
# x_1, x_2 ....
|
285
285
|
#
|
286
286
|
# @return {Statsample::Dataset}
|
@@ -354,7 +354,7 @@ module Statsample
|
|
354
354
|
# Generate a matrix, based on fields of dataset
|
355
355
|
#
|
356
356
|
# @return {::Matrix}
|
357
|
-
|
357
|
+
|
358
358
|
def collect_matrix
|
359
359
|
rows=@fields.collect{|row|
|
360
360
|
@fields.collect{|col|
|
@@ -363,7 +363,7 @@ module Statsample
|
|
363
363
|
}
|
364
364
|
Matrix.rows(rows)
|
365
365
|
end
|
366
|
-
|
366
|
+
|
367
367
|
# We have the same datasets if +vectors+ and +fields+ are the same
|
368
368
|
#
|
369
369
|
# @return {Boolean}
|
@@ -371,7 +371,7 @@ module Statsample
|
|
371
371
|
@vectors==d2.vectors and @fields==d2.fields
|
372
372
|
end
|
373
373
|
# Returns vector <tt>c</tt>
|
374
|
-
#
|
374
|
+
#
|
375
375
|
# @return {Statsample::Vector}
|
376
376
|
def col(c)
|
377
377
|
@vectors[c]
|
@@ -409,18 +409,18 @@ module Statsample
|
|
409
409
|
# Can only add one case and no error check if performed
|
410
410
|
# You SHOULD use #update_valid_data at the end of insertion cycle
|
411
411
|
#
|
412
|
-
#
|
412
|
+
#
|
413
413
|
def add_case_array(v)
|
414
414
|
v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
|
415
415
|
end
|
416
416
|
# Insert a case, using:
|
417
417
|
# * Array: size equal to number of vectors and values in the same order as fields
|
418
418
|
# * Hash: keys equal to fields
|
419
|
-
# If uvd is false, #update_valid_data is not executed after
|
420
|
-
# inserting a case. This is very useful if you want to increase the
|
421
|
-
# performance on inserting many cases, because #update_valid_data
|
419
|
+
# If uvd is false, #update_valid_data is not executed after
|
420
|
+
# inserting a case. This is very useful if you want to increase the
|
421
|
+
# performance on inserting many cases, because #update_valid_data
|
422
422
|
# performs check on vectors and on the dataset
|
423
|
-
|
423
|
+
|
424
424
|
def add_case(v,uvd=true)
|
425
425
|
case v
|
426
426
|
when Array
|
@@ -440,7 +440,7 @@ module Statsample
|
|
440
440
|
update_valid_data
|
441
441
|
end
|
442
442
|
end
|
443
|
-
# Check vectors and fields after inserting data. Use only
|
443
|
+
# Check vectors and fields after inserting data. Use only
|
444
444
|
# after #add_case_array or #add_case with second parameter to false
|
445
445
|
def update_valid_data
|
446
446
|
@gsl=nil
|
@@ -459,7 +459,7 @@ module Statsample
|
|
459
459
|
@vectors.delete(name)
|
460
460
|
end
|
461
461
|
end
|
462
|
-
|
462
|
+
|
463
463
|
def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
|
464
464
|
split=@vectors[name_].split_by_separator(sep)
|
465
465
|
i=1
|
@@ -476,7 +476,7 @@ module Statsample
|
|
476
476
|
add_vector(name+join+k,v)
|
477
477
|
}
|
478
478
|
end
|
479
|
-
|
479
|
+
|
480
480
|
def vector_by_calculation(type=:scale)
|
481
481
|
a=[]
|
482
482
|
each do |row|
|
@@ -485,7 +485,7 @@ module Statsample
|
|
485
485
|
a.to_vector(type)
|
486
486
|
end
|
487
487
|
# Returns a vector with sumatory of fields
|
488
|
-
# if fields parameter is empty, sum all fields
|
488
|
+
# if fields parameter is empty, sum all fields
|
489
489
|
def vector_sum(fields=nil)
|
490
490
|
fields||=@fields
|
491
491
|
vector=collect_with_index do |row, i|
|
@@ -504,7 +504,7 @@ module Statsample
|
|
504
504
|
raise "Fields #{(fields-@fields).join(", ")} doesn't exists on dataset" if (fields-@fields).size>0
|
505
505
|
fields
|
506
506
|
end
|
507
|
-
|
507
|
+
|
508
508
|
# Returns a vector with the numbers of missing values for a case
|
509
509
|
def vector_missing_values(fields=nil)
|
510
510
|
fields=check_fields(fields)
|
@@ -570,7 +570,7 @@ module Statsample
|
|
570
570
|
def each_vector # :yield: |key, vector|
|
571
571
|
@fields.each{|k| yield k, @vectors[k]}
|
572
572
|
end
|
573
|
-
|
573
|
+
|
574
574
|
if Statsample::STATSAMPLE__.respond_to?(:case_as_hash)
|
575
575
|
def case_as_hash(c) # :nodoc:
|
576
576
|
Statsample::STATSAMPLE__.case_as_hash(self,c)
|
@@ -598,7 +598,7 @@ module Statsample
|
|
598
598
|
def _case_as_array(c) # :nodoc:
|
599
599
|
@fields.collect {|x| @vectors[x][c]}
|
600
600
|
end
|
601
|
-
|
601
|
+
|
602
602
|
# Returns each case as a hash
|
603
603
|
def each
|
604
604
|
begin
|
@@ -613,7 +613,7 @@ module Statsample
|
|
613
613
|
raise DatasetException.new(self, e)
|
614
614
|
end
|
615
615
|
end
|
616
|
-
|
616
|
+
|
617
617
|
# Returns each case as hash and index
|
618
618
|
def each_with_index # :yield: |case, i|
|
619
619
|
begin
|
@@ -628,7 +628,7 @@ module Statsample
|
|
628
628
|
raise DatasetException.new(self, e)
|
629
629
|
end
|
630
630
|
end
|
631
|
-
|
631
|
+
|
632
632
|
# Returns each case as an array, coding missing values as nils
|
633
633
|
def each_array_with_nils
|
634
634
|
m=fields.size
|
@@ -702,7 +702,7 @@ module Statsample
|
|
702
702
|
}
|
703
703
|
@vectors[vector_name].set_valid_data
|
704
704
|
end
|
705
|
-
|
705
|
+
|
706
706
|
def crosstab(v1,v2,opts={})
|
707
707
|
Statsample::Crosstab.new(@vectors[v1], @vectors[v2],opts)
|
708
708
|
end
|
@@ -714,7 +714,7 @@ module Statsample
|
|
714
714
|
raise ArgumentError,"Should pass a Statsample::Vector"
|
715
715
|
end
|
716
716
|
end
|
717
|
-
# Return data as a matrix. Column are ordered by #fields and
|
717
|
+
# Return data as a matrix. Column are ordered by #fields and
|
718
718
|
# rows by orden of insertion
|
719
719
|
def to_matrix
|
720
720
|
rows=[]
|
@@ -723,12 +723,12 @@ module Statsample
|
|
723
723
|
}
|
724
724
|
Matrix.rows(rows)
|
725
725
|
end
|
726
|
-
|
726
|
+
|
727
727
|
if Statsample.has_gsl?
|
728
728
|
def clear_gsl
|
729
729
|
@gsl=nil
|
730
730
|
end
|
731
|
-
|
731
|
+
|
732
732
|
def to_gsl
|
733
733
|
if @gsl.nil?
|
734
734
|
if cases.nil?
|
@@ -741,30 +741,31 @@ module Statsample
|
|
741
741
|
end
|
742
742
|
@gsl
|
743
743
|
end
|
744
|
-
|
744
|
+
|
745
745
|
end
|
746
|
-
|
746
|
+
|
747
747
|
# Return a correlation matrix for fields included as parameters.
|
748
748
|
# By default, uses all fields of dataset
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
end
|
755
|
-
Statsample::Bivariate.correlation_matrix(ds)
|
749
|
+
def correlation_matrix(fields = nil)
|
750
|
+
if fields
|
751
|
+
ds = clone(fields)
|
752
|
+
else
|
753
|
+
ds = self
|
756
754
|
end
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
755
|
+
Statsample::Bivariate.correlation_matrix(ds)
|
756
|
+
end
|
757
|
+
|
758
|
+
# Return a correlation matrix for fields included as parameters.
|
759
|
+
# By default, uses all fields of dataset
|
760
|
+
def covariance_matrix(fields = nil)
|
761
|
+
if fields
|
762
|
+
ds = clone(fields)
|
763
|
+
else
|
764
|
+
ds = self
|
766
765
|
end
|
767
|
-
|
766
|
+
Statsample::Bivariate.covariance_matrix(ds)
|
767
|
+
end
|
768
|
+
|
768
769
|
# Create a new dataset with all cases which the block returns true
|
769
770
|
def filter
|
770
771
|
ds=self.dup_empty
|
@@ -775,7 +776,7 @@ module Statsample
|
|
775
776
|
ds.name=_("%s(filtered)") % @name
|
776
777
|
ds
|
777
778
|
end
|
778
|
-
|
779
|
+
|
779
780
|
# creates a new vector with the data of a given field which the block returns true
|
780
781
|
def filter_field(field)
|
781
782
|
a=[]
|
@@ -784,11 +785,11 @@ module Statsample
|
|
784
785
|
end
|
785
786
|
a.to_vector(@vectors[field].type)
|
786
787
|
end
|
787
|
-
|
788
|
+
|
788
789
|
# Creates a Stastample::Multiset, using one or more fields
|
789
790
|
# to split the dataset.
|
790
|
-
|
791
|
-
|
791
|
+
|
792
|
+
|
792
793
|
def to_multiset_by_split(*fields)
|
793
794
|
require 'statsample/multiset'
|
794
795
|
if fields.size==1
|
@@ -798,7 +799,7 @@ module Statsample
|
|
798
799
|
end
|
799
800
|
end
|
800
801
|
# Creates a Statsample::Multiset, using one field
|
801
|
-
|
802
|
+
|
802
803
|
def to_multiset_by_split_one_field(field)
|
803
804
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
804
805
|
factors=@vectors[field].factors
|
@@ -815,7 +816,7 @@ module Statsample
|
|
815
816
|
v1.type=@vectors[k1].type
|
816
817
|
v1.name=@vectors[k1].name
|
817
818
|
v1.labels=@vectors[k1].labels
|
818
|
-
|
819
|
+
|
819
820
|
}
|
820
821
|
}
|
821
822
|
ms
|
@@ -838,7 +839,7 @@ module Statsample
|
|
838
839
|
|
839
840
|
p1=eval "Proc.new {|c| ms[["+fields.collect{|f| "c['#{f}']"}.join(",")+"]].add_case(c,false) }"
|
840
841
|
each{|c| p1.call(c)}
|
841
|
-
|
842
|
+
|
842
843
|
ms.datasets.each do |k,ds|
|
843
844
|
ds.update_valid_data
|
844
845
|
ds.name=fields.size.times.map {|i|
|
@@ -846,15 +847,15 @@ module Statsample
|
|
846
847
|
sk=k[i]
|
847
848
|
@vectors[f].labeling(sk)
|
848
849
|
}.join("-")
|
849
|
-
ds.vectors.each{|k1,v1|
|
850
|
+
ds.vectors.each{|k1,v1|
|
850
851
|
v1.type=@vectors[k1].type
|
851
852
|
v1.name=@vectors[k1].name
|
852
853
|
v1.labels=@vectors[k1].labels
|
853
|
-
|
854
|
+
|
854
855
|
}
|
855
856
|
end
|
856
857
|
ms
|
857
|
-
|
858
|
+
|
858
859
|
end
|
859
860
|
# Returns a vector, based on a string with a calculation based
|
860
861
|
# on vector
|
@@ -923,14 +924,14 @@ module Statsample
|
|
923
924
|
end
|
924
925
|
# Creates a new dataset for one to many relations
|
925
926
|
# on a dataset, based on pattern of field names.
|
926
|
-
#
|
927
|
+
#
|
927
928
|
# for example, you have a survey for number of children
|
928
929
|
# with this structure:
|
929
930
|
# id, name, child_name_1, child_age_1, child_name_2, child_age_2
|
930
|
-
# with
|
931
|
+
# with
|
931
932
|
# ds.one_to_many(%w{id}, "child_%v_%n"
|
932
933
|
# the field of first parameters will be copied verbatim
|
933
|
-
# to new dataset, and fields which responds to second
|
934
|
+
# to new dataset, and fields which responds to second
|
934
935
|
# pattern will be added one case for each different %n.
|
935
936
|
# For example
|
936
937
|
# cases=[
|
@@ -942,13 +943,13 @@ module Statsample
|
|
942
943
|
# cases.each {|c| ds.add_case_array c }
|
943
944
|
# ds.one_to_many(['id'],'car_%v%n').to_matrix
|
944
945
|
# => Matrix[
|
945
|
-
# ["red", "1", 10],
|
946
|
+
# ["red", "1", 10],
|
946
947
|
# ["blue", "1", 20],
|
947
948
|
# ["green", "2", 15],
|
948
949
|
# ["orange", "2", 30],
|
949
950
|
# ["white", "2", 20]
|
950
951
|
# ]
|
951
|
-
#
|
952
|
+
#
|
952
953
|
def one_to_many(parent_fields, pattern)
|
953
954
|
#base_pattern=pattern.gsub(/%v|%n/,"")
|
954
955
|
re=Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
|
@@ -962,7 +963,7 @@ module Statsample
|
|
962
963
|
@fields.each do |f|
|
963
964
|
if f=~re
|
964
965
|
if !vars.include? $1
|
965
|
-
vars.push($1)
|
966
|
+
vars.push($1)
|
966
967
|
h[$1]=Statsample::Vector.new([], @vectors[f].type)
|
967
968
|
end
|
968
969
|
max_n=$2.to_i if max_n < $2.to_i
|
@@ -986,7 +987,7 @@ module Statsample
|
|
986
987
|
row_out["_col_id"]=n
|
987
988
|
ds.add_case(row_out,false)
|
988
989
|
end
|
989
|
-
|
990
|
+
|
990
991
|
end
|
991
992
|
end
|
992
993
|
ds.update_valid_data
|
data/lib/statsample/matrix.rb
CHANGED
@@ -27,14 +27,14 @@ class ::Matrix
|
|
27
27
|
if defined? :eigenpairs
|
28
28
|
alias_method :eigenpairs_ruby, :eigenpairs
|
29
29
|
end
|
30
|
-
|
30
|
+
|
31
31
|
if Statsample.has_gsl?
|
32
32
|
# Optimize eigenpairs of extendmatrix module using gsl
|
33
33
|
def eigenpairs
|
34
34
|
to_gsl.eigenpairs
|
35
35
|
end
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
def eigenvalues
|
39
39
|
eigenpairs.collect {|v| v[0]}
|
40
40
|
end
|
@@ -44,11 +44,11 @@ class ::Matrix
|
|
44
44
|
def eigenvectors_matrix
|
45
45
|
Matrix.columns(eigenvectors)
|
46
46
|
end
|
47
|
-
|
48
|
-
|
49
47
|
|
50
|
-
|
51
|
-
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
52
|
def to_gsl
|
53
53
|
out=[]
|
54
54
|
self.row_size.times{|i|
|
@@ -76,7 +76,7 @@ module GSL
|
|
76
76
|
def to_gsl
|
77
77
|
self
|
78
78
|
end
|
79
|
-
|
79
|
+
|
80
80
|
def to_dataset
|
81
81
|
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
|
82
82
|
ds=Statsample::Dataset.new(f)
|
@@ -91,7 +91,7 @@ module GSL
|
|
91
91
|
ds.name=self.name if self.respond_to? :name
|
92
92
|
ds
|
93
93
|
end
|
94
|
-
|
94
|
+
|
95
95
|
def row_size
|
96
96
|
size1
|
97
97
|
end
|
@@ -110,18 +110,18 @@ module GSL
|
|
110
110
|
def eigenvectors
|
111
111
|
eigenpairs.collect {|v| v[1]}
|
112
112
|
end
|
113
|
-
|
113
|
+
|
114
114
|
# Matrix sum of squares
|
115
115
|
def mssq
|
116
116
|
sum=0
|
117
117
|
to_v.each {|i| sum+=i**2}
|
118
118
|
sum
|
119
119
|
end
|
120
|
-
|
120
|
+
|
121
121
|
def eigenvectors_matrix
|
122
122
|
eigval, eigvec= GSL::Eigen.symmv(self)
|
123
123
|
GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
|
124
|
-
eigvec
|
124
|
+
eigvec
|
125
125
|
end
|
126
126
|
def eigenpairs
|
127
127
|
eigval, eigvec= GSL::Eigen.symmv(self)
|
@@ -130,7 +130,7 @@ module GSL
|
|
130
130
|
[eigval[i],eigvec.get_col(i)]
|
131
131
|
}
|
132
132
|
end
|
133
|
-
|
133
|
+
|
134
134
|
#def eigenpairs_ruby
|
135
135
|
# self.to_matrix.eigenpairs_ruby
|
136
136
|
#end
|
@@ -158,7 +158,7 @@ end
|
|
158
158
|
module Statsample
|
159
159
|
# Module to add names to X and Y fields
|
160
160
|
module NamedMatrix
|
161
|
-
include Summarizable
|
161
|
+
include Summarizable
|
162
162
|
|
163
163
|
def fields
|
164
164
|
raise "Should be square" if !square?
|
@@ -178,10 +178,10 @@ module Statsample
|
|
178
178
|
@fields_y=v
|
179
179
|
end
|
180
180
|
def fields_x
|
181
|
-
@fields_x||=row_size.times.collect {|i| _("X%d") % i}
|
181
|
+
@fields_x||=row_size.times.collect {|i| _("X%d") % i}
|
182
182
|
end
|
183
183
|
def fields_y
|
184
|
-
@fields_y||=column_size.times.collect {|i| _("Y%d") % i}
|
184
|
+
@fields_y||=column_size.times.collect {|i| _("Y%d") % i}
|
185
185
|
end
|
186
186
|
|
187
187
|
def name
|
@@ -195,13 +195,13 @@ module Statsample
|
|
195
195
|
@@named_matrix+=1
|
196
196
|
_("Matrix %d") % @@named_matrix
|
197
197
|
end
|
198
|
-
|
198
|
+
|
199
199
|
end
|
200
200
|
# Module to add method for variance/covariance and correlation matrices
|
201
201
|
# == Usage
|
202
202
|
# matrix=Matrix[[1,2],[2,3]]
|
203
203
|
# matrix.extend CovariateMatrix
|
204
|
-
#
|
204
|
+
#
|
205
205
|
module CovariateMatrix
|
206
206
|
include NamedMatrix
|
207
207
|
@@covariatematrix=0
|
@@ -217,7 +217,7 @@ module Statsample
|
|
217
217
|
else
|
218
218
|
@type
|
219
219
|
end
|
220
|
-
|
220
|
+
|
221
221
|
end
|
222
222
|
def _type=(t)
|
223
223
|
@type=t
|
@@ -233,7 +233,7 @@ module Statsample
|
|
233
233
|
end
|
234
234
|
}
|
235
235
|
})
|
236
|
-
matrix.extend CovariateMatrix
|
236
|
+
matrix.extend CovariateMatrix
|
237
237
|
matrix.fields_x=fields_x
|
238
238
|
matrix.fields_y=fields_y
|
239
239
|
matrix._type=:correlation
|
@@ -242,19 +242,19 @@ module Statsample
|
|
242
242
|
self
|
243
243
|
end
|
244
244
|
end
|
245
|
-
|
246
|
-
|
245
|
+
|
246
|
+
|
247
247
|
# Get variance for field k
|
248
|
-
#
|
248
|
+
#
|
249
249
|
def variance(k)
|
250
250
|
submatrix([k])[0,0]
|
251
251
|
end
|
252
|
-
|
252
|
+
|
253
253
|
def get_new_name
|
254
254
|
@@covariatematrix+=1
|
255
255
|
_("Covariate matrix %d") % @@covariatematrix
|
256
256
|
end
|
257
|
-
|
257
|
+
|
258
258
|
# Select a submatrix of factors. If you have a correlation matrix
|
259
259
|
# with a, b and c, you could obtain a submatrix of correlations of
|
260
260
|
# a and b, b and c or a and b
|
@@ -264,7 +264,7 @@ module Statsample
|
|
264
264
|
#
|
265
265
|
# Example:
|
266
266
|
# a=Matrix[[1.0, 0.3, 0.2],
|
267
|
-
# [0.3, 1.0, 0.5],
|
267
|
+
# [0.3, 1.0, 0.5],
|
268
268
|
# [0.2, 0.5, 1.0]]
|
269
269
|
# a.extend CovariateMatrix
|
270
270
|
# a.fields=%w{a b c}
|
@@ -272,31 +272,31 @@ module Statsample
|
|
272
272
|
# => Matrix[[0.5],[0.3]]
|
273
273
|
# a.submatrix(%w{c a})
|
274
274
|
# => Matrix[[1.0, 0.2] , [0.2, 1.0]]
|
275
|
-
def submatrix(rows,columns=nil)
|
276
|
-
raise ArgumentError, "rows shouldn't be empty" if rows.respond_to? :size and rows.size==0
|
277
|
-
columns||=rows
|
275
|
+
def submatrix(rows,columns = nil)
|
276
|
+
raise ArgumentError, "rows shouldn't be empty" if rows.respond_to? :size and rows.size == 0
|
277
|
+
columns ||= rows
|
278
278
|
# Convert all fields on index
|
279
|
-
row_index=rows.collect
|
280
|
-
r=v.is_a?(Numeric) ? v : fields_x.index(v)
|
279
|
+
row_index = rows.collect do |v|
|
280
|
+
r = v.is_a?(Numeric) ? v : fields_x.index(v)
|
281
281
|
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
282
282
|
r
|
283
|
-
|
284
|
-
|
285
|
-
|
283
|
+
end
|
284
|
+
|
285
|
+
column_index = columns.collect do |v|
|
286
|
+
r = v.is_a?(Numeric) ? v : fields_y.index(v)
|
286
287
|
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
287
288
|
r
|
288
|
-
|
289
|
-
|
290
|
-
|
289
|
+
end
|
290
|
+
|
291
|
+
|
291
292
|
fx=row_index.collect {|v| fields_x[v]}
|
292
293
|
fy=column_index.collect {|v| fields_y[v]}
|
293
|
-
|
294
|
-
matrix= Matrix.rows(row_index.collect {|i|
|
295
|
-
|
296
|
-
matrix.
|
297
|
-
matrix.
|
298
|
-
matrix.
|
299
|
-
matrix._type=_type
|
294
|
+
|
295
|
+
matrix = Matrix.rows(row_index.collect { |i| column_index.collect { |j| self[i, j] }})
|
296
|
+
matrix.extend CovariateMatrix
|
297
|
+
matrix.fields_x = fx
|
298
|
+
matrix.fields_y = fy
|
299
|
+
matrix._type = _type
|
300
300
|
matrix
|
301
301
|
end
|
302
302
|
def report_building(generator)
|