statsample 1.4.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +14 -0
- data/Gemfile +1 -16
- data/History.txt +51 -46
- data/LICENSE.txt +7 -82
- data/README.md +145 -150
- data/Rakefile +20 -102
- data/lib/spss.rb +17 -14
- data/lib/statsample/crosstab.rb +2 -2
- data/lib/statsample/dataset.rb +82 -81
- data/lib/statsample/matrix.rb +43 -43
- data/lib/statsample/reliability.rb +1 -2
- data/lib/statsample/vector.rb +157 -124
- data/lib/statsample/version.rb +1 -1
- data/lib/statsample.rb +91 -91
- data/references.txt +2 -1
- data/statsample.gemspec +89 -0
- data/test/test_awesome_print_bug.rb +16 -0
- data/test/test_crosstab.rb +8 -0
- data/test/test_histogram.rb +7 -0
- data/test/test_vector.rb +62 -48
- metadata +109 -120
- data/.gemtest +0 -0
- data/Gemfile.lock +0 -78
- data/Manifest.txt +0 -157
- data/setup.rb +0 -1585
data/lib/statsample/dataset.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'statsample/vector'
|
2
2
|
|
3
3
|
class Hash
|
4
|
-
# Creates a Statsample::Dataset based on a Hash
|
4
|
+
# Creates a Statsample::Dataset based on a Hash
|
5
5
|
def to_dataset(*args)
|
6
6
|
Statsample::Dataset.new(self, *args)
|
7
7
|
end
|
@@ -29,15 +29,15 @@ module Statsample
|
|
29
29
|
m
|
30
30
|
end
|
31
31
|
end
|
32
|
-
# Set of cases with values for one or more variables,
|
32
|
+
# Set of cases with values for one or more variables,
|
33
33
|
# analog to a dataframe on R or a standard data file of SPSS.
|
34
34
|
# Every vector has <tt>#field</tt> name, which represent it. By default,
|
35
|
-
# the vectors are ordered by it field name, but you can change it
|
35
|
+
# the vectors are ordered by it field name, but you can change it
|
36
36
|
# the fields order manually.
|
37
37
|
# The Dataset work as a Hash, with keys are field names
|
38
|
-
# and values are Statsample::Vector
|
39
|
-
#
|
40
|
-
#
|
38
|
+
# and values are Statsample::Vector
|
39
|
+
#
|
40
|
+
#
|
41
41
|
# ==Usage
|
42
42
|
# Create a empty dataset:
|
43
43
|
# Dataset.new()
|
@@ -46,7 +46,7 @@ module Statsample
|
|
46
46
|
# Create a dataset with two vectors, called <tt>v1</tt>
|
47
47
|
# and <tt>v2</tt>:
|
48
48
|
# Dataset.new({'v1'=>%w{1 2 3}.to_vector, 'v2'=>%w{4 5 6}.to_vector})
|
49
|
-
# Create a dataset with two given vectors (v1 and v2),
|
49
|
+
# Create a dataset with two given vectors (v1 and v2),
|
50
50
|
# with vectors on inverted order:
|
51
51
|
# Dataset.new({'v2'=>v2,'v1'=>v1},['v2','v1'])
|
52
52
|
#
|
@@ -54,8 +54,8 @@ module Statsample
|
|
54
54
|
# field order as arguments
|
55
55
|
# v1 = [1,2,3].to_scale
|
56
56
|
# v2 = [1,2,3].to_scale
|
57
|
-
# ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
|
58
|
-
|
57
|
+
# ds = {'v1'=>v2, 'v2'=>v2}.to_dataset(%w{v2 v1})
|
58
|
+
|
59
59
|
class Dataset
|
60
60
|
include Writable
|
61
61
|
include Summarizable
|
@@ -99,7 +99,7 @@ module Statsample
|
|
99
99
|
;a}
|
100
100
|
values.each_index{|i|
|
101
101
|
h_rows[rows[i]][columns[i]]=values[i]
|
102
|
-
}
|
102
|
+
}
|
103
103
|
ds=Dataset.new(["_id"]+cols_values)
|
104
104
|
cols_values.each{|c|
|
105
105
|
ds[c].type=values.type
|
@@ -121,15 +121,15 @@ module Statsample
|
|
121
121
|
end
|
122
122
|
# Return a nested hash using fields as keys and
|
123
123
|
# an array constructed of hashes with other values.
|
124
|
-
# If block provided, is used to provide the
|
125
|
-
# values, with parameters +row+ of dataset,
|
124
|
+
# If block provided, is used to provide the
|
125
|
+
# values, with parameters +row+ of dataset,
|
126
126
|
# +current+ last hash on hierarchy and
|
127
127
|
# +name+ of the key to include
|
128
128
|
def nest(*tree_keys,&block)
|
129
129
|
tree_keys=tree_keys[0] if tree_keys[0].is_a? Array
|
130
|
-
out=Hash.new
|
130
|
+
out=Hash.new
|
131
131
|
each do |row|
|
132
|
-
current=out
|
132
|
+
current=out
|
133
133
|
# Create tree
|
134
134
|
tree_keys[0,tree_keys.size-1].each do |f|
|
135
135
|
root=row[f]
|
@@ -162,7 +162,7 @@ module Statsample
|
|
162
162
|
@cases=0
|
163
163
|
@gsl=nil
|
164
164
|
@i=nil
|
165
|
-
|
165
|
+
|
166
166
|
if vectors.instance_of? Array
|
167
167
|
@fields=vectors.dup
|
168
168
|
@vectors=vectors.inject({}){|a,x| a[x]=Statsample::Vector.new(); a}
|
@@ -174,10 +174,10 @@ module Statsample
|
|
174
174
|
check_length
|
175
175
|
end
|
176
176
|
end
|
177
|
-
#
|
177
|
+
#
|
178
178
|
# Creates a copy of the given dataset, deleting all the cases with
|
179
179
|
# missing data on one of the vectors.
|
180
|
-
#
|
180
|
+
#
|
181
181
|
# @param array of fields to include. No value include all fields
|
182
182
|
#
|
183
183
|
def dup_only_valid(*fields_to_include)
|
@@ -201,12 +201,12 @@ module Statsample
|
|
201
201
|
ds
|
202
202
|
end
|
203
203
|
#
|
204
|
-
# Returns a duplicate of the Dataset.
|
204
|
+
# Returns a duplicate of the Dataset.
|
205
205
|
# All vectors are copied, so any modification on new
|
206
206
|
# dataset doesn't affect original dataset's vectors.
|
207
207
|
# If fields given as parameter, only include those vectors.
|
208
208
|
#
|
209
|
-
# @param array of fields to include. No value include all fields
|
209
|
+
# @param array of fields to include. No value include all fields
|
210
210
|
# @return {Statsample::Dataset}
|
211
211
|
def dup(*fields_to_include)
|
212
212
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
@@ -224,15 +224,15 @@ module Statsample
|
|
224
224
|
ds.name= self.name
|
225
225
|
ds
|
226
226
|
end
|
227
|
-
|
228
|
-
|
227
|
+
|
228
|
+
|
229
229
|
# Returns an array with the fields from first argumen to last argument
|
230
230
|
def from_to(from,to)
|
231
231
|
raise ArgumentError, "Field #{from} should be on dataset" if !@fields.include? from
|
232
232
|
raise ArgumentError, "Field #{to} should be on dataset" if !@fields.include? to
|
233
233
|
@fields.slice(@fields.index(from)..@fields.index(to))
|
234
234
|
end
|
235
|
-
|
235
|
+
|
236
236
|
# Returns (when possible) a cheap copy of dataset.
|
237
237
|
# If no vector have missing values, returns original vectors.
|
238
238
|
# If missing values presents, uses Dataset.dup_only_valid.
|
@@ -253,7 +253,7 @@ module Statsample
|
|
253
253
|
# Returns a shallow copy of Dataset.
|
254
254
|
# Object id will be distinct, but @vectors will be the same.
|
255
255
|
# @param array of fields to include. No value include all fields
|
256
|
-
# @return {Statsample::Dataset}
|
256
|
+
# @return {Statsample::Dataset}
|
257
257
|
def clone(*fields_to_include)
|
258
258
|
if fields_to_include.size==1 and fields_to_include[0].is_a? Array
|
259
259
|
fields_to_include=fields_to_include[0]
|
@@ -280,7 +280,7 @@ module Statsample
|
|
280
280
|
Dataset.new(vectors,@fields.dup)
|
281
281
|
end
|
282
282
|
# Merge vectors from two datasets
|
283
|
-
# In case of name collition, the vectors names are changed to
|
283
|
+
# In case of name collition, the vectors names are changed to
|
284
284
|
# x_1, x_2 ....
|
285
285
|
#
|
286
286
|
# @return {Statsample::Dataset}
|
@@ -354,7 +354,7 @@ module Statsample
|
|
354
354
|
# Generate a matrix, based on fields of dataset
|
355
355
|
#
|
356
356
|
# @return {::Matrix}
|
357
|
-
|
357
|
+
|
358
358
|
def collect_matrix
|
359
359
|
rows=@fields.collect{|row|
|
360
360
|
@fields.collect{|col|
|
@@ -363,7 +363,7 @@ module Statsample
|
|
363
363
|
}
|
364
364
|
Matrix.rows(rows)
|
365
365
|
end
|
366
|
-
|
366
|
+
|
367
367
|
# We have the same datasets if +vectors+ and +fields+ are the same
|
368
368
|
#
|
369
369
|
# @return {Boolean}
|
@@ -371,7 +371,7 @@ module Statsample
|
|
371
371
|
@vectors==d2.vectors and @fields==d2.fields
|
372
372
|
end
|
373
373
|
# Returns vector <tt>c</tt>
|
374
|
-
#
|
374
|
+
#
|
375
375
|
# @return {Statsample::Vector}
|
376
376
|
def col(c)
|
377
377
|
@vectors[c]
|
@@ -409,18 +409,18 @@ module Statsample
|
|
409
409
|
# Can only add one case and no error check if performed
|
410
410
|
# You SHOULD use #update_valid_data at the end of insertion cycle
|
411
411
|
#
|
412
|
-
#
|
412
|
+
#
|
413
413
|
def add_case_array(v)
|
414
414
|
v.each_index {|i| d=@vectors[@fields[i]].data; d.push(v[i])}
|
415
415
|
end
|
416
416
|
# Insert a case, using:
|
417
417
|
# * Array: size equal to number of vectors and values in the same order as fields
|
418
418
|
# * Hash: keys equal to fields
|
419
|
-
# If uvd is false, #update_valid_data is not executed after
|
420
|
-
# inserting a case. This is very useful if you want to increase the
|
421
|
-
# performance on inserting many cases, because #update_valid_data
|
419
|
+
# If uvd is false, #update_valid_data is not executed after
|
420
|
+
# inserting a case. This is very useful if you want to increase the
|
421
|
+
# performance on inserting many cases, because #update_valid_data
|
422
422
|
# performs check on vectors and on the dataset
|
423
|
-
|
423
|
+
|
424
424
|
def add_case(v,uvd=true)
|
425
425
|
case v
|
426
426
|
when Array
|
@@ -440,7 +440,7 @@ module Statsample
|
|
440
440
|
update_valid_data
|
441
441
|
end
|
442
442
|
end
|
443
|
-
# Check vectors and fields after inserting data. Use only
|
443
|
+
# Check vectors and fields after inserting data. Use only
|
444
444
|
# after #add_case_array or #add_case with second parameter to false
|
445
445
|
def update_valid_data
|
446
446
|
@gsl=nil
|
@@ -459,7 +459,7 @@ module Statsample
|
|
459
459
|
@vectors.delete(name)
|
460
460
|
end
|
461
461
|
end
|
462
|
-
|
462
|
+
|
463
463
|
def add_vectors_by_split_recode(name_,join='-',sep=Statsample::SPLIT_TOKEN)
|
464
464
|
split=@vectors[name_].split_by_separator(sep)
|
465
465
|
i=1
|
@@ -476,7 +476,7 @@ module Statsample
|
|
476
476
|
add_vector(name+join+k,v)
|
477
477
|
}
|
478
478
|
end
|
479
|
-
|
479
|
+
|
480
480
|
def vector_by_calculation(type=:scale)
|
481
481
|
a=[]
|
482
482
|
each do |row|
|
@@ -485,7 +485,7 @@ module Statsample
|
|
485
485
|
a.to_vector(type)
|
486
486
|
end
|
487
487
|
# Returns a vector with sumatory of fields
|
488
|
-
# if fields parameter is empty, sum all fields
|
488
|
+
# if fields parameter is empty, sum all fields
|
489
489
|
def vector_sum(fields=nil)
|
490
490
|
fields||=@fields
|
491
491
|
vector=collect_with_index do |row, i|
|
@@ -504,7 +504,7 @@ module Statsample
|
|
504
504
|
raise "Fields #{(fields-@fields).join(", ")} doesn't exists on dataset" if (fields-@fields).size>0
|
505
505
|
fields
|
506
506
|
end
|
507
|
-
|
507
|
+
|
508
508
|
# Returns a vector with the numbers of missing values for a case
|
509
509
|
def vector_missing_values(fields=nil)
|
510
510
|
fields=check_fields(fields)
|
@@ -570,7 +570,7 @@ module Statsample
|
|
570
570
|
def each_vector # :yield: |key, vector|
|
571
571
|
@fields.each{|k| yield k, @vectors[k]}
|
572
572
|
end
|
573
|
-
|
573
|
+
|
574
574
|
if Statsample::STATSAMPLE__.respond_to?(:case_as_hash)
|
575
575
|
def case_as_hash(c) # :nodoc:
|
576
576
|
Statsample::STATSAMPLE__.case_as_hash(self,c)
|
@@ -598,7 +598,7 @@ module Statsample
|
|
598
598
|
def _case_as_array(c) # :nodoc:
|
599
599
|
@fields.collect {|x| @vectors[x][c]}
|
600
600
|
end
|
601
|
-
|
601
|
+
|
602
602
|
# Returns each case as a hash
|
603
603
|
def each
|
604
604
|
begin
|
@@ -613,7 +613,7 @@ module Statsample
|
|
613
613
|
raise DatasetException.new(self, e)
|
614
614
|
end
|
615
615
|
end
|
616
|
-
|
616
|
+
|
617
617
|
# Returns each case as hash and index
|
618
618
|
def each_with_index # :yield: |case, i|
|
619
619
|
begin
|
@@ -628,7 +628,7 @@ module Statsample
|
|
628
628
|
raise DatasetException.new(self, e)
|
629
629
|
end
|
630
630
|
end
|
631
|
-
|
631
|
+
|
632
632
|
# Returns each case as an array, coding missing values as nils
|
633
633
|
def each_array_with_nils
|
634
634
|
m=fields.size
|
@@ -702,7 +702,7 @@ module Statsample
|
|
702
702
|
}
|
703
703
|
@vectors[vector_name].set_valid_data
|
704
704
|
end
|
705
|
-
|
705
|
+
|
706
706
|
def crosstab(v1,v2,opts={})
|
707
707
|
Statsample::Crosstab.new(@vectors[v1], @vectors[v2],opts)
|
708
708
|
end
|
@@ -714,7 +714,7 @@ module Statsample
|
|
714
714
|
raise ArgumentError,"Should pass a Statsample::Vector"
|
715
715
|
end
|
716
716
|
end
|
717
|
-
# Return data as a matrix. Column are ordered by #fields and
|
717
|
+
# Return data as a matrix. Column are ordered by #fields and
|
718
718
|
# rows by orden of insertion
|
719
719
|
def to_matrix
|
720
720
|
rows=[]
|
@@ -723,12 +723,12 @@ module Statsample
|
|
723
723
|
}
|
724
724
|
Matrix.rows(rows)
|
725
725
|
end
|
726
|
-
|
726
|
+
|
727
727
|
if Statsample.has_gsl?
|
728
728
|
def clear_gsl
|
729
729
|
@gsl=nil
|
730
730
|
end
|
731
|
-
|
731
|
+
|
732
732
|
def to_gsl
|
733
733
|
if @gsl.nil?
|
734
734
|
if cases.nil?
|
@@ -741,30 +741,31 @@ module Statsample
|
|
741
741
|
end
|
742
742
|
@gsl
|
743
743
|
end
|
744
|
-
|
744
|
+
|
745
745
|
end
|
746
|
-
|
746
|
+
|
747
747
|
# Return a correlation matrix for fields included as parameters.
|
748
748
|
# By default, uses all fields of dataset
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
end
|
755
|
-
Statsample::Bivariate.correlation_matrix(ds)
|
749
|
+
def correlation_matrix(fields = nil)
|
750
|
+
if fields
|
751
|
+
ds = clone(fields)
|
752
|
+
else
|
753
|
+
ds = self
|
756
754
|
end
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
755
|
+
Statsample::Bivariate.correlation_matrix(ds)
|
756
|
+
end
|
757
|
+
|
758
|
+
# Return a correlation matrix for fields included as parameters.
|
759
|
+
# By default, uses all fields of dataset
|
760
|
+
def covariance_matrix(fields = nil)
|
761
|
+
if fields
|
762
|
+
ds = clone(fields)
|
763
|
+
else
|
764
|
+
ds = self
|
766
765
|
end
|
767
|
-
|
766
|
+
Statsample::Bivariate.covariance_matrix(ds)
|
767
|
+
end
|
768
|
+
|
768
769
|
# Create a new dataset with all cases which the block returns true
|
769
770
|
def filter
|
770
771
|
ds=self.dup_empty
|
@@ -775,7 +776,7 @@ module Statsample
|
|
775
776
|
ds.name=_("%s(filtered)") % @name
|
776
777
|
ds
|
777
778
|
end
|
778
|
-
|
779
|
+
|
779
780
|
# creates a new vector with the data of a given field which the block returns true
|
780
781
|
def filter_field(field)
|
781
782
|
a=[]
|
@@ -784,11 +785,11 @@ module Statsample
|
|
784
785
|
end
|
785
786
|
a.to_vector(@vectors[field].type)
|
786
787
|
end
|
787
|
-
|
788
|
+
|
788
789
|
# Creates a Stastample::Multiset, using one or more fields
|
789
790
|
# to split the dataset.
|
790
|
-
|
791
|
-
|
791
|
+
|
792
|
+
|
792
793
|
def to_multiset_by_split(*fields)
|
793
794
|
require 'statsample/multiset'
|
794
795
|
if fields.size==1
|
@@ -798,7 +799,7 @@ module Statsample
|
|
798
799
|
end
|
799
800
|
end
|
800
801
|
# Creates a Statsample::Multiset, using one field
|
801
|
-
|
802
|
+
|
802
803
|
def to_multiset_by_split_one_field(field)
|
803
804
|
raise ArgumentError,"Should use a correct field name" if !@fields.include? field
|
804
805
|
factors=@vectors[field].factors
|
@@ -815,7 +816,7 @@ module Statsample
|
|
815
816
|
v1.type=@vectors[k1].type
|
816
817
|
v1.name=@vectors[k1].name
|
817
818
|
v1.labels=@vectors[k1].labels
|
818
|
-
|
819
|
+
|
819
820
|
}
|
820
821
|
}
|
821
822
|
ms
|
@@ -838,7 +839,7 @@ module Statsample
|
|
838
839
|
|
839
840
|
p1=eval "Proc.new {|c| ms[["+fields.collect{|f| "c['#{f}']"}.join(",")+"]].add_case(c,false) }"
|
840
841
|
each{|c| p1.call(c)}
|
841
|
-
|
842
|
+
|
842
843
|
ms.datasets.each do |k,ds|
|
843
844
|
ds.update_valid_data
|
844
845
|
ds.name=fields.size.times.map {|i|
|
@@ -846,15 +847,15 @@ module Statsample
|
|
846
847
|
sk=k[i]
|
847
848
|
@vectors[f].labeling(sk)
|
848
849
|
}.join("-")
|
849
|
-
ds.vectors.each{|k1,v1|
|
850
|
+
ds.vectors.each{|k1,v1|
|
850
851
|
v1.type=@vectors[k1].type
|
851
852
|
v1.name=@vectors[k1].name
|
852
853
|
v1.labels=@vectors[k1].labels
|
853
|
-
|
854
|
+
|
854
855
|
}
|
855
856
|
end
|
856
857
|
ms
|
857
|
-
|
858
|
+
|
858
859
|
end
|
859
860
|
# Returns a vector, based on a string with a calculation based
|
860
861
|
# on vector
|
@@ -923,14 +924,14 @@ module Statsample
|
|
923
924
|
end
|
924
925
|
# Creates a new dataset for one to many relations
|
925
926
|
# on a dataset, based on pattern of field names.
|
926
|
-
#
|
927
|
+
#
|
927
928
|
# for example, you have a survey for number of children
|
928
929
|
# with this structure:
|
929
930
|
# id, name, child_name_1, child_age_1, child_name_2, child_age_2
|
930
|
-
# with
|
931
|
+
# with
|
931
932
|
# ds.one_to_many(%w{id}, "child_%v_%n"
|
932
933
|
# the field of first parameters will be copied verbatim
|
933
|
-
# to new dataset, and fields which responds to second
|
934
|
+
# to new dataset, and fields which responds to second
|
934
935
|
# pattern will be added one case for each different %n.
|
935
936
|
# For example
|
936
937
|
# cases=[
|
@@ -942,13 +943,13 @@ module Statsample
|
|
942
943
|
# cases.each {|c| ds.add_case_array c }
|
943
944
|
# ds.one_to_many(['id'],'car_%v%n').to_matrix
|
944
945
|
# => Matrix[
|
945
|
-
# ["red", "1", 10],
|
946
|
+
# ["red", "1", 10],
|
946
947
|
# ["blue", "1", 20],
|
947
948
|
# ["green", "2", 15],
|
948
949
|
# ["orange", "2", 30],
|
949
950
|
# ["white", "2", 20]
|
950
951
|
# ]
|
951
|
-
#
|
952
|
+
#
|
952
953
|
def one_to_many(parent_fields, pattern)
|
953
954
|
#base_pattern=pattern.gsub(/%v|%n/,"")
|
954
955
|
re=Regexp.new pattern.gsub("%v","(.+?)").gsub("%n","(\\d+?)")
|
@@ -962,7 +963,7 @@ module Statsample
|
|
962
963
|
@fields.each do |f|
|
963
964
|
if f=~re
|
964
965
|
if !vars.include? $1
|
965
|
-
vars.push($1)
|
966
|
+
vars.push($1)
|
966
967
|
h[$1]=Statsample::Vector.new([], @vectors[f].type)
|
967
968
|
end
|
968
969
|
max_n=$2.to_i if max_n < $2.to_i
|
@@ -986,7 +987,7 @@ module Statsample
|
|
986
987
|
row_out["_col_id"]=n
|
987
988
|
ds.add_case(row_out,false)
|
988
989
|
end
|
989
|
-
|
990
|
+
|
990
991
|
end
|
991
992
|
end
|
992
993
|
ds.update_valid_data
|
data/lib/statsample/matrix.rb
CHANGED
@@ -27,14 +27,14 @@ class ::Matrix
|
|
27
27
|
if defined? :eigenpairs
|
28
28
|
alias_method :eigenpairs_ruby, :eigenpairs
|
29
29
|
end
|
30
|
-
|
30
|
+
|
31
31
|
if Statsample.has_gsl?
|
32
32
|
# Optimize eigenpairs of extendmatrix module using gsl
|
33
33
|
def eigenpairs
|
34
34
|
to_gsl.eigenpairs
|
35
35
|
end
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
def eigenvalues
|
39
39
|
eigenpairs.collect {|v| v[0]}
|
40
40
|
end
|
@@ -44,11 +44,11 @@ class ::Matrix
|
|
44
44
|
def eigenvectors_matrix
|
45
45
|
Matrix.columns(eigenvectors)
|
46
46
|
end
|
47
|
-
|
48
|
-
|
49
47
|
|
50
|
-
|
51
|
-
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
|
52
52
|
def to_gsl
|
53
53
|
out=[]
|
54
54
|
self.row_size.times{|i|
|
@@ -76,7 +76,7 @@ module GSL
|
|
76
76
|
def to_gsl
|
77
77
|
self
|
78
78
|
end
|
79
|
-
|
79
|
+
|
80
80
|
def to_dataset
|
81
81
|
f = (self.respond_to? :fields_y) ? fields_y : column_size.times.map {|i| _("VAR_%d") % (i+1) }
|
82
82
|
ds=Statsample::Dataset.new(f)
|
@@ -91,7 +91,7 @@ module GSL
|
|
91
91
|
ds.name=self.name if self.respond_to? :name
|
92
92
|
ds
|
93
93
|
end
|
94
|
-
|
94
|
+
|
95
95
|
def row_size
|
96
96
|
size1
|
97
97
|
end
|
@@ -110,18 +110,18 @@ module GSL
|
|
110
110
|
def eigenvectors
|
111
111
|
eigenpairs.collect {|v| v[1]}
|
112
112
|
end
|
113
|
-
|
113
|
+
|
114
114
|
# Matrix sum of squares
|
115
115
|
def mssq
|
116
116
|
sum=0
|
117
117
|
to_v.each {|i| sum+=i**2}
|
118
118
|
sum
|
119
119
|
end
|
120
|
-
|
120
|
+
|
121
121
|
def eigenvectors_matrix
|
122
122
|
eigval, eigvec= GSL::Eigen.symmv(self)
|
123
123
|
GSL::Eigen::symmv_sort(eigval, eigvec, GSL::Eigen::SORT_VAL_DESC)
|
124
|
-
eigvec
|
124
|
+
eigvec
|
125
125
|
end
|
126
126
|
def eigenpairs
|
127
127
|
eigval, eigvec= GSL::Eigen.symmv(self)
|
@@ -130,7 +130,7 @@ module GSL
|
|
130
130
|
[eigval[i],eigvec.get_col(i)]
|
131
131
|
}
|
132
132
|
end
|
133
|
-
|
133
|
+
|
134
134
|
#def eigenpairs_ruby
|
135
135
|
# self.to_matrix.eigenpairs_ruby
|
136
136
|
#end
|
@@ -158,7 +158,7 @@ end
|
|
158
158
|
module Statsample
|
159
159
|
# Module to add names to X and Y fields
|
160
160
|
module NamedMatrix
|
161
|
-
include Summarizable
|
161
|
+
include Summarizable
|
162
162
|
|
163
163
|
def fields
|
164
164
|
raise "Should be square" if !square?
|
@@ -178,10 +178,10 @@ module Statsample
|
|
178
178
|
@fields_y=v
|
179
179
|
end
|
180
180
|
def fields_x
|
181
|
-
@fields_x||=row_size.times.collect {|i| _("X%d") % i}
|
181
|
+
@fields_x||=row_size.times.collect {|i| _("X%d") % i}
|
182
182
|
end
|
183
183
|
def fields_y
|
184
|
-
@fields_y||=column_size.times.collect {|i| _("Y%d") % i}
|
184
|
+
@fields_y||=column_size.times.collect {|i| _("Y%d") % i}
|
185
185
|
end
|
186
186
|
|
187
187
|
def name
|
@@ -195,13 +195,13 @@ module Statsample
|
|
195
195
|
@@named_matrix+=1
|
196
196
|
_("Matrix %d") % @@named_matrix
|
197
197
|
end
|
198
|
-
|
198
|
+
|
199
199
|
end
|
200
200
|
# Module to add method for variance/covariance and correlation matrices
|
201
201
|
# == Usage
|
202
202
|
# matrix=Matrix[[1,2],[2,3]]
|
203
203
|
# matrix.extend CovariateMatrix
|
204
|
-
#
|
204
|
+
#
|
205
205
|
module CovariateMatrix
|
206
206
|
include NamedMatrix
|
207
207
|
@@covariatematrix=0
|
@@ -217,7 +217,7 @@ module Statsample
|
|
217
217
|
else
|
218
218
|
@type
|
219
219
|
end
|
220
|
-
|
220
|
+
|
221
221
|
end
|
222
222
|
def _type=(t)
|
223
223
|
@type=t
|
@@ -233,7 +233,7 @@ module Statsample
|
|
233
233
|
end
|
234
234
|
}
|
235
235
|
})
|
236
|
-
matrix.extend CovariateMatrix
|
236
|
+
matrix.extend CovariateMatrix
|
237
237
|
matrix.fields_x=fields_x
|
238
238
|
matrix.fields_y=fields_y
|
239
239
|
matrix._type=:correlation
|
@@ -242,19 +242,19 @@ module Statsample
|
|
242
242
|
self
|
243
243
|
end
|
244
244
|
end
|
245
|
-
|
246
|
-
|
245
|
+
|
246
|
+
|
247
247
|
# Get variance for field k
|
248
|
-
#
|
248
|
+
#
|
249
249
|
def variance(k)
|
250
250
|
submatrix([k])[0,0]
|
251
251
|
end
|
252
|
-
|
252
|
+
|
253
253
|
def get_new_name
|
254
254
|
@@covariatematrix+=1
|
255
255
|
_("Covariate matrix %d") % @@covariatematrix
|
256
256
|
end
|
257
|
-
|
257
|
+
|
258
258
|
# Select a submatrix of factors. If you have a correlation matrix
|
259
259
|
# with a, b and c, you could obtain a submatrix of correlations of
|
260
260
|
# a and b, b and c or a and b
|
@@ -264,7 +264,7 @@ module Statsample
|
|
264
264
|
#
|
265
265
|
# Example:
|
266
266
|
# a=Matrix[[1.0, 0.3, 0.2],
|
267
|
-
# [0.3, 1.0, 0.5],
|
267
|
+
# [0.3, 1.0, 0.5],
|
268
268
|
# [0.2, 0.5, 1.0]]
|
269
269
|
# a.extend CovariateMatrix
|
270
270
|
# a.fields=%w{a b c}
|
@@ -272,31 +272,31 @@ module Statsample
|
|
272
272
|
# => Matrix[[0.5],[0.3]]
|
273
273
|
# a.submatrix(%w{c a})
|
274
274
|
# => Matrix[[1.0, 0.2] , [0.2, 1.0]]
|
275
|
-
def submatrix(rows,columns=nil)
|
276
|
-
raise ArgumentError, "rows shouldn't be empty" if rows.respond_to? :size and rows.size==0
|
277
|
-
columns||=rows
|
275
|
+
def submatrix(rows,columns = nil)
|
276
|
+
raise ArgumentError, "rows shouldn't be empty" if rows.respond_to? :size and rows.size == 0
|
277
|
+
columns ||= rows
|
278
278
|
# Convert all fields on index
|
279
|
-
row_index=rows.collect
|
280
|
-
r=v.is_a?(Numeric) ? v : fields_x.index(v)
|
279
|
+
row_index = rows.collect do |v|
|
280
|
+
r = v.is_a?(Numeric) ? v : fields_x.index(v)
|
281
281
|
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
282
282
|
r
|
283
|
-
|
284
|
-
|
285
|
-
|
283
|
+
end
|
284
|
+
|
285
|
+
column_index = columns.collect do |v|
|
286
|
+
r = v.is_a?(Numeric) ? v : fields_y.index(v)
|
286
287
|
raise "Index #{v} doesn't exists on matrix" if r.nil?
|
287
288
|
r
|
288
|
-
|
289
|
-
|
290
|
-
|
289
|
+
end
|
290
|
+
|
291
|
+
|
291
292
|
fx=row_index.collect {|v| fields_x[v]}
|
292
293
|
fy=column_index.collect {|v| fields_y[v]}
|
293
|
-
|
294
|
-
matrix= Matrix.rows(row_index.collect {|i|
|
295
|
-
|
296
|
-
matrix.
|
297
|
-
matrix.
|
298
|
-
matrix.
|
299
|
-
matrix._type=_type
|
294
|
+
|
295
|
+
matrix = Matrix.rows(row_index.collect { |i| column_index.collect { |j| self[i, j] }})
|
296
|
+
matrix.extend CovariateMatrix
|
297
|
+
matrix.fields_x = fx
|
298
|
+
matrix.fields_y = fy
|
299
|
+
matrix._type = _type
|
300
300
|
matrix
|
301
301
|
end
|
302
302
|
def report_building(generator)
|