statsample-optimization 2.0.3-x86-linux
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +8 -0
- data/Manifest.txt +7 -0
- data/README.txt +30 -0
- data/Rakefile +23 -0
- data/ext/statsamplert/extconf.rb +3 -0
- data/ext/statsamplert/statsamplert.c +130 -0
- data/lib/statsamplert.so +0 -0
- data/test/test_statsample_optimization.rb +43 -0
- metadata +160 -0
data/History.txt
ADDED
data/Manifest.txt
ADDED
data/README.txt
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
= Statsample (optimization package)
|
2
|
+
|
3
|
+
http://rubyforge.org/projects/ruby-statsample/
|
4
|
+
|
5
|
+
== DESCRIPTION:
|
6
|
+
|
7
|
+
Install gsl, statistics2 and provides a C extension to optimize the following methods
|
8
|
+
|
9
|
+
* Vector#frecuencies
|
10
|
+
* Vector#set_valid_data
|
11
|
+
* Dataset#case_as_hash
|
12
|
+
* Dataset#case_as_array
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
== SYNOPSIS:
|
17
|
+
Only require statsample. The module detects and use the extensions
|
18
|
+
|
19
|
+
|
20
|
+
== REQUIREMENTS:
|
21
|
+
|
22
|
+
* Statsample
|
23
|
+
|
24
|
+
== INSTALL:
|
25
|
+
|
26
|
+
sudo gem install statsample-optimization
|
27
|
+
|
28
|
+
== LICENSE:
|
29
|
+
|
30
|
+
GPL-2
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
# -*- ruby -*-
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
require 'rake/extensiontask'
|
6
|
+
require 'hoe'
|
7
|
+
Hoe.plugin :git
|
8
|
+
|
9
|
+
task "test" => ["lib/statsamplert.so"]
|
10
|
+
spec=Hoe.spec 'statsample-optimization' do
|
11
|
+
self.version="2.0.3"
|
12
|
+
self.spec_extras[:extensions] = ["ext/statsamplert/extconf.rb"]
|
13
|
+
self.rubyforge_name = 'ruby-statsample'
|
14
|
+
self.developer('Claudio Bustos', 'clbustos_at_gmail.com')
|
15
|
+
self.extra_deps << ["statsample","~>0.12.0"] << ["statistics2", "~>0.54"] << ["gsl", "~>1.12.109"]
|
16
|
+
self.extra_deps << ["rake-compiler"]
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
Rake::ExtensionTask.new('statsamplert',spec.spec)
|
21
|
+
|
22
|
+
|
23
|
+
# vim: syntax=ruby
|
@@ -0,0 +1,130 @@
|
|
1
|
+
#include <ruby.h>
|
2
|
+
/**
|
3
|
+
* :stopdoc:
|
4
|
+
*/
|
5
|
+
void Init_statsamplert();
|
6
|
+
VALUE statsample_frequencies(VALUE self, VALUE data);
|
7
|
+
VALUE statsample_set_valid_data_intern(VALUE self, VALUE vector);
|
8
|
+
VALUE statsample_case_as_hash(VALUE self, VALUE ds, VALUE index);
|
9
|
+
VALUE statsample_case_as_array(VALUE self, VALUE ds, VALUE index);
|
10
|
+
void Init_statsamplert()
|
11
|
+
{
|
12
|
+
VALUE mStatsample;
|
13
|
+
VALUE mSTATSAMPLE__;
|
14
|
+
ID id_statsample;
|
15
|
+
ID id_STATSAMPLE__;
|
16
|
+
id_statsample = rb_intern("Statsample");
|
17
|
+
id_STATSAMPLE__ = rb_intern("STATSAMPLE__");
|
18
|
+
|
19
|
+
if(rb_const_defined(rb_cObject, id_statsample)) {
|
20
|
+
mStatsample = rb_const_get(rb_cObject, id_statsample);
|
21
|
+
} else {
|
22
|
+
mStatsample = rb_define_module("Statsample");
|
23
|
+
}
|
24
|
+
if(rb_const_defined(mStatsample, id_STATSAMPLE__)) {
|
25
|
+
mSTATSAMPLE__ = rb_const_get(mStatsample, id_STATSAMPLE__);
|
26
|
+
} else {
|
27
|
+
mSTATSAMPLE__ = rb_define_module_under(mStatsample, "STATSAMPLE__");
|
28
|
+
}
|
29
|
+
|
30
|
+
rb_define_const(mStatsample, "OPTIMIZED",Qtrue);
|
31
|
+
rb_define_module_function(mSTATSAMPLE__,"frequencies",statsample_frequencies,1);
|
32
|
+
rb_define_module_function(mSTATSAMPLE__,"set_valid_data_intern", statsample_set_valid_data_intern, 1);
|
33
|
+
rb_define_module_function(mSTATSAMPLE__,"case_as_hash",statsample_case_as_hash,2);
|
34
|
+
rb_define_module_function(mSTATSAMPLE__,"case_as_array",statsample_case_as_array,2);
|
35
|
+
|
36
|
+
}
|
37
|
+
|
38
|
+
VALUE statsample_set_valid_data_intern(VALUE self, VALUE vector) {
|
39
|
+
/** Emulate
|
40
|
+
|
41
|
+
@data.each do |n|
|
42
|
+
if is_valid? n
|
43
|
+
@valid_data.push(n)
|
44
|
+
@data_with_nils.push(n)
|
45
|
+
else
|
46
|
+
@data_with_nils.push(nil)
|
47
|
+
@missing_data.push(n)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
@has_missing_data=@missing_data.size>0
|
51
|
+
*/
|
52
|
+
VALUE data=rb_iv_get(vector,"@data");
|
53
|
+
VALUE valid_data=rb_iv_get(vector,"@valid_data");
|
54
|
+
VALUE data_with_nils=rb_iv_get(vector,"@data_with_nils");
|
55
|
+
VALUE missing_data=rb_iv_get(vector,"@missing_data");
|
56
|
+
VALUE missing_values=rb_iv_get(vector,"@missing_values");
|
57
|
+
// VALUE has_missing_data=rb_iv_get(vector,"@has_missing_data");
|
58
|
+
long len=RARRAY_LEN(data);
|
59
|
+
long i;
|
60
|
+
VALUE val;
|
61
|
+
for(i=0;i<len;i++) {
|
62
|
+
val=rb_ary_entry(data,i);
|
63
|
+
if(val==Qnil || rb_ary_includes(missing_values,val)) {
|
64
|
+
rb_ary_push(missing_data,val);
|
65
|
+
rb_ary_push(data_with_nils,Qnil);
|
66
|
+
} else {
|
67
|
+
rb_ary_push(valid_data,val);
|
68
|
+
rb_ary_push(data_with_nils,val);
|
69
|
+
}
|
70
|
+
}
|
71
|
+
rb_iv_set(vector,"@has_missing_data",(RARRAY_LEN(missing_data)>0) ? Qtrue : Qfalse);
|
72
|
+
return Qnil;
|
73
|
+
}
|
74
|
+
/**
|
75
|
+
* Retuns frequencies for an array as a hash, with
|
76
|
+
* keys as items and values as number of items
|
77
|
+
*/
|
78
|
+
VALUE statsample_frequencies(VALUE self, VALUE data) {
|
79
|
+
VALUE h;
|
80
|
+
VALUE val;
|
81
|
+
long len;
|
82
|
+
long i;
|
83
|
+
|
84
|
+
Check_Type(data,T_ARRAY);
|
85
|
+
h=rb_hash_new();
|
86
|
+
|
87
|
+
len=RARRAY_LEN(data);
|
88
|
+
for(i=0;i<len;i++) {
|
89
|
+
val=rb_ary_entry(data,i);
|
90
|
+
if(rb_hash_aref(h,val)==Qnil) {
|
91
|
+
rb_hash_aset(h,val,INT2FIX(1));
|
92
|
+
} else {
|
93
|
+
long antiguo=FIX2LONG(rb_hash_aref(h,val));
|
94
|
+
rb_hash_aset(h,val,LONG2FIX(antiguo+1));
|
95
|
+
}
|
96
|
+
}
|
97
|
+
return h;
|
98
|
+
}
|
99
|
+
|
100
|
+
VALUE statsample_case_as_hash(VALUE self, VALUE ds,VALUE index) {
|
101
|
+
VALUE vector,data,key;
|
102
|
+
VALUE fields=rb_iv_get(ds,"@fields");
|
103
|
+
VALUE vectors=rb_iv_get(ds,"@vectors");
|
104
|
+
VALUE h=rb_hash_new();
|
105
|
+
long len=RARRAY_LEN(fields);
|
106
|
+
long i;
|
107
|
+
for(i=0;i<len;i++) {
|
108
|
+
key=rb_ary_entry(fields,i);
|
109
|
+
vector=rb_hash_aref(vectors,key);
|
110
|
+
data=rb_iv_get(vector,"@data");
|
111
|
+
rb_hash_aset(h,key,rb_ary_entry(data,NUM2LONG(index)));
|
112
|
+
}
|
113
|
+
return h;
|
114
|
+
}
|
115
|
+
VALUE statsample_case_as_array(VALUE self, VALUE ds, VALUE index) {
|
116
|
+
VALUE vector,data,key;
|
117
|
+
VALUE fields=rb_iv_get(ds,"@fields");
|
118
|
+
VALUE vectors=rb_iv_get(ds,"@vectors");
|
119
|
+
VALUE ar=rb_ary_new();
|
120
|
+
long len=RARRAY_LEN(fields);
|
121
|
+
long i;
|
122
|
+
for(i=0;i<len;i++) {
|
123
|
+
key=rb_ary_entry(fields,i);
|
124
|
+
vector=rb_hash_aref(vectors,key);
|
125
|
+
data=rb_iv_get(vector,"@data");
|
126
|
+
rb_ary_push(ar,rb_ary_entry(data,NUM2LONG(index)));
|
127
|
+
}
|
128
|
+
return ar;
|
129
|
+
}
|
130
|
+
|
data/lib/statsamplert.so
ADDED
Binary file
|
@@ -0,0 +1,43 @@
|
|
1
|
+
$:.unshift(File.dirname(__FILE__)+"/lib")
|
2
|
+
require "test/unit"
|
3
|
+
#require "statsample"
|
4
|
+
require "statsamplert"
|
5
|
+
require "statsample"
|
6
|
+
class TestRubyStatsampleOpt < Test::Unit::TestCase
|
7
|
+
def test_base
|
8
|
+
assert(Statsample::OPTIMIZED,true)
|
9
|
+
end
|
10
|
+
def test_frequencies
|
11
|
+
assert(Statsample::STATSAMPLE__.respond_to?(:frequencies))
|
12
|
+
v=[1,1,2].to_vector
|
13
|
+
exp={1=>2,2=>1}
|
14
|
+
assert_equal(exp,Statsample::STATSAMPLE__.frequencies(v.data))
|
15
|
+
assert_equal(v._frequencies, v.frequencies)
|
16
|
+
end
|
17
|
+
def test_set_valid_data
|
18
|
+
assert(Statsample::STATSAMPLE__.respond_to?(:set_valid_data_intern))
|
19
|
+
v=[2,3,4].to_vector
|
20
|
+
v.add(5)
|
21
|
+
assert_equal([2,3,4,5], v.valid_data)
|
22
|
+
v.add(nil,false)
|
23
|
+
assert_equal([2,3,4,5], v.valid_data)
|
24
|
+
assert_equal([2,3,4,5,nil], v.data)
|
25
|
+
end
|
26
|
+
def test_case_as_array
|
27
|
+
assert(Statsample::STATSAMPLE__.respond_to?(:case_as_array))
|
28
|
+
a=[1,2,3].to_vector(:scale)
|
29
|
+
b=[4,5,6].to_vector(:scale)
|
30
|
+
ds={"a"=>a,"b"=>b}.to_dataset
|
31
|
+
assert_equal([1,4],Statsample::STATSAMPLE__.case_as_array(ds,0))
|
32
|
+
assert_equal([2,5],Statsample::STATSAMPLE__.case_as_array(ds,1))
|
33
|
+
assert_equal([3,6],Statsample::STATSAMPLE__.case_as_array(ds,2))
|
34
|
+
end
|
35
|
+
def test_case_as_hash
|
36
|
+
assert(Statsample::STATSAMPLE__.respond_to?(:case_as_hash))
|
37
|
+
a=[1,2,3].to_vector(:scale)
|
38
|
+
b=[4,5,6].to_vector(:scale)
|
39
|
+
ds={"a"=>a,"b"=>b}.to_dataset
|
40
|
+
assert_equal({"a"=>1,"b"=>4},Statsample::STATSAMPLE__.case_as_hash(ds,0))
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
metadata
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: statsample-optimization
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 9
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 2
|
8
|
+
- 0
|
9
|
+
- 3
|
10
|
+
version: 2.0.3
|
11
|
+
platform: x86-linux
|
12
|
+
authors:
|
13
|
+
- Claudio Bustos
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-06-19 00:00:00 -04:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: statsample
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 47
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
- 12
|
33
|
+
- 0
|
34
|
+
version: 0.12.0
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: statistics2
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 103
|
46
|
+
segments:
|
47
|
+
- 0
|
48
|
+
- 54
|
49
|
+
version: "0.54"
|
50
|
+
type: :runtime
|
51
|
+
version_requirements: *id002
|
52
|
+
- !ruby/object:Gem::Dependency
|
53
|
+
name: gsl
|
54
|
+
prerelease: false
|
55
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ~>
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
hash: 253
|
61
|
+
segments:
|
62
|
+
- 1
|
63
|
+
- 12
|
64
|
+
- 109
|
65
|
+
version: 1.12.109
|
66
|
+
type: :runtime
|
67
|
+
version_requirements: *id003
|
68
|
+
- !ruby/object:Gem::Dependency
|
69
|
+
name: rake-compiler
|
70
|
+
prerelease: false
|
71
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
72
|
+
none: false
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
hash: 3
|
77
|
+
segments:
|
78
|
+
- 0
|
79
|
+
version: "0"
|
80
|
+
type: :runtime
|
81
|
+
version_requirements: *id004
|
82
|
+
- !ruby/object:Gem::Dependency
|
83
|
+
name: hoe
|
84
|
+
prerelease: false
|
85
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
86
|
+
none: false
|
87
|
+
requirements:
|
88
|
+
- - ">="
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
hash: 21
|
91
|
+
segments:
|
92
|
+
- 2
|
93
|
+
- 6
|
94
|
+
- 1
|
95
|
+
version: 2.6.1
|
96
|
+
type: :development
|
97
|
+
version_requirements: *id005
|
98
|
+
description: |-
|
99
|
+
Install gsl, statistics2 and provides a C extension to optimize the following methods
|
100
|
+
|
101
|
+
* Vector#frecuencies
|
102
|
+
* Vector#set_valid_data
|
103
|
+
* Dataset#case_as_hash
|
104
|
+
* Dataset#case_as_array
|
105
|
+
email:
|
106
|
+
- clbustos_at_gmail.com
|
107
|
+
executables: []
|
108
|
+
|
109
|
+
extensions: []
|
110
|
+
|
111
|
+
extra_rdoc_files:
|
112
|
+
- History.txt
|
113
|
+
- Manifest.txt
|
114
|
+
- README.txt
|
115
|
+
files:
|
116
|
+
- History.txt
|
117
|
+
- Manifest.txt
|
118
|
+
- README.txt
|
119
|
+
- Rakefile
|
120
|
+
- ext/statsamplert/extconf.rb
|
121
|
+
- ext/statsamplert/statsamplert.c
|
122
|
+
- test/test_statsample_optimization.rb
|
123
|
+
- lib/statsamplert.so
|
124
|
+
has_rdoc: true
|
125
|
+
homepage: http://rubyforge.org/projects/ruby-statsample/
|
126
|
+
licenses: []
|
127
|
+
|
128
|
+
post_install_message:
|
129
|
+
rdoc_options:
|
130
|
+
- --main
|
131
|
+
- README.txt
|
132
|
+
require_paths:
|
133
|
+
- lib
|
134
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
135
|
+
none: false
|
136
|
+
requirements:
|
137
|
+
- - ">="
|
138
|
+
- !ruby/object:Gem::Version
|
139
|
+
hash: 3
|
140
|
+
segments:
|
141
|
+
- 0
|
142
|
+
version: "0"
|
143
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
144
|
+
none: false
|
145
|
+
requirements:
|
146
|
+
- - ">="
|
147
|
+
- !ruby/object:Gem::Version
|
148
|
+
hash: 3
|
149
|
+
segments:
|
150
|
+
- 0
|
151
|
+
version: "0"
|
152
|
+
requirements: []
|
153
|
+
|
154
|
+
rubyforge_project: ruby-statsample
|
155
|
+
rubygems_version: 1.3.7
|
156
|
+
signing_key:
|
157
|
+
specification_version: 3
|
158
|
+
summary: Install gsl, statistics2 and provides a C extension to optimize the following methods * Vector#frecuencies * Vector#set_valid_data * Dataset#case_as_hash * Dataset#case_as_array
|
159
|
+
test_files:
|
160
|
+
- test/test_statsample_optimization.rb
|