knn_cv 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE +674 -0
- data/README.md +36 -0
- data/Rakefile +14 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/c_knn/Makefile +260 -0
- data/ext/c_knn/extconf.rb +5 -0
- data/ext/c_knn/knn.c +250 -0
- data/knn_cv.gemspec +34 -0
- data/lib/knn_cv/version.rb +3 -0
- data/lib/knn_cv.rb +7 -0
- metadata +116 -0
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# KnnCv
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/knn_cv`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'knn_cv'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install knn_cv
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/knn_cv.
|
36
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
|
4
|
+
Rake::TestTask.new(:test) do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.libs << "lib"
|
7
|
+
t.test_files = FileList['test/**/*_test.rb']
|
8
|
+
end
|
9
|
+
|
10
|
+
task :default => :spec
|
11
|
+
|
12
|
+
require 'rake/extensiontask'
|
13
|
+
spec = Gem::Specification.load('feature-selection.gemspec')
|
14
|
+
Rake::ExtensionTask.new('c_knn', spec)
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "knn_cv"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/ext/c_knn/Makefile
ADDED
@@ -0,0 +1,260 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
# V=0 quiet, V=1 verbose. other values don't work.
|
5
|
+
V = 0
|
6
|
+
Q1 = $(V:1=)
|
7
|
+
Q = $(Q1:0=@)
|
8
|
+
ECHO1 = $(V:1=@:)
|
9
|
+
ECHO = $(ECHO1:0=@echo)
|
10
|
+
NULLCMD = :
|
11
|
+
|
12
|
+
#### Start of system configuration section. ####
|
13
|
+
|
14
|
+
srcdir = .
|
15
|
+
topdir = /home/fdavidcl/.rvm/rubies/ruby-2.3.0/include/ruby-2.3.0
|
16
|
+
hdrdir = $(topdir)
|
17
|
+
arch_hdrdir = /home/fdavidcl/.rvm/rubies/ruby-2.3.0/include/ruby-2.3.0/x86_64-linux
|
18
|
+
PATH_SEPARATOR = :
|
19
|
+
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
20
|
+
prefix = $(DESTDIR)/home/fdavidcl/.rvm/rubies/ruby-2.3.0
|
21
|
+
rubysitearchprefix = $(rubylibprefix)/$(sitearch)
|
22
|
+
rubyarchprefix = $(rubylibprefix)/$(arch)
|
23
|
+
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
|
24
|
+
exec_prefix = $(prefix)
|
25
|
+
vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
|
26
|
+
sitearchhdrdir = $(sitehdrdir)/$(sitearch)
|
27
|
+
rubyarchhdrdir = $(rubyhdrdir)/$(arch)
|
28
|
+
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
29
|
+
sitehdrdir = $(rubyhdrdir)/site_ruby
|
30
|
+
rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
|
31
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
32
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
33
|
+
vendordir = $(rubylibprefix)/vendor_ruby
|
34
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
35
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
36
|
+
sitedir = $(rubylibprefix)/site_ruby
|
37
|
+
rubyarchdir = $(rubylibdir)/$(arch)
|
38
|
+
rubylibdir = $(rubylibprefix)/$(ruby_version)
|
39
|
+
sitearchincludedir = $(includedir)/$(sitearch)
|
40
|
+
archincludedir = $(includedir)/$(arch)
|
41
|
+
sitearchlibdir = $(libdir)/$(sitearch)
|
42
|
+
archlibdir = $(libdir)/$(arch)
|
43
|
+
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
44
|
+
mandir = $(datarootdir)/man
|
45
|
+
localedir = $(datarootdir)/locale
|
46
|
+
libdir = $(exec_prefix)/lib
|
47
|
+
psdir = $(docdir)
|
48
|
+
pdfdir = $(docdir)
|
49
|
+
dvidir = $(docdir)
|
50
|
+
htmldir = $(docdir)
|
51
|
+
infodir = $(datarootdir)/info
|
52
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
53
|
+
oldincludedir = $(DESTDIR)/usr/include
|
54
|
+
includedir = $(prefix)/include
|
55
|
+
localstatedir = $(prefix)/var
|
56
|
+
sharedstatedir = $(prefix)/com
|
57
|
+
sysconfdir = $(prefix)/etc
|
58
|
+
datadir = $(datarootdir)
|
59
|
+
datarootdir = $(prefix)/share
|
60
|
+
libexecdir = $(exec_prefix)/libexec
|
61
|
+
sbindir = $(exec_prefix)/sbin
|
62
|
+
bindir = $(exec_prefix)/bin
|
63
|
+
archdir = $(rubyarchdir)
|
64
|
+
|
65
|
+
|
66
|
+
CC = gcc
|
67
|
+
CXX = g++
|
68
|
+
LIBRUBY = $(LIBRUBY_SO)
|
69
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
70
|
+
LIBRUBYARG_SHARED = -Wl,-R$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
|
71
|
+
LIBRUBYARG_STATIC = -Wl,-R$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static
|
72
|
+
empty =
|
73
|
+
OUTFLAG = -o $(empty)
|
74
|
+
COUTFLAG = -o $(empty)
|
75
|
+
|
76
|
+
RUBY_EXTCONF_H =
|
77
|
+
cflags = $(optflags) $(debugflags) $(warnflags)
|
78
|
+
cxxflags = $(optflags) $(debugflags) $(warnflags)
|
79
|
+
optflags = -O3 -fno-fast-math
|
80
|
+
debugflags = -ggdb3
|
81
|
+
warnflags = -Wall -Wextra -Wno-unused-parameter -Wno-parentheses -Wno-long-long -Wno-missing-field-initializers -Wunused-variable -Wpointer-arith -Wwrite-strings -Wdeclaration-after-statement -Wimplicit-function-declaration -Wdeprecated-declarations -Wno-packed-bitfield-compat -Wno-maybe-uninitialized
|
82
|
+
CCDLFLAGS = -fPIC
|
83
|
+
CFLAGS = $(CCDLFLAGS) $(cflags) -fPIC $(ARCH_FLAG)
|
84
|
+
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
85
|
+
DEFS =
|
86
|
+
CPPFLAGS = $(DEFS) $(cppflags)
|
87
|
+
CXXFLAGS = $(CCDLFLAGS) $(cxxflags) $(ARCH_FLAG)
|
88
|
+
ldflags = -L. -fstack-protector -rdynamic -Wl,-export-dynamic
|
89
|
+
dldflags =
|
90
|
+
ARCH_FLAG =
|
91
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
92
|
+
LDSHARED = $(CC) -shared
|
93
|
+
LDSHAREDXX = $(CXX) -shared
|
94
|
+
AR = ar
|
95
|
+
EXEEXT =
|
96
|
+
|
97
|
+
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
98
|
+
RUBY_SO_NAME = ruby
|
99
|
+
RUBYW_INSTALL_NAME =
|
100
|
+
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
|
101
|
+
RUBYW_BASE_NAME = rubyw
|
102
|
+
RUBY_BASE_NAME = ruby
|
103
|
+
|
104
|
+
arch = x86_64-linux
|
105
|
+
sitearch = $(arch)
|
106
|
+
ruby_version = 2.3.0
|
107
|
+
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
108
|
+
RUBY = $(ruby)
|
109
|
+
ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
|
110
|
+
|
111
|
+
RM = rm -f
|
112
|
+
RM_RF = $(RUBY) -run -e rm -- -rf
|
113
|
+
RMDIRS = rmdir --ignore-fail-on-non-empty -p
|
114
|
+
MAKEDIRS = /usr/bin/mkdir -p
|
115
|
+
INSTALL = /usr/bin/install -c
|
116
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
117
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
118
|
+
COPY = cp
|
119
|
+
TOUCH = exit >
|
120
|
+
|
121
|
+
#### End of system configuration section. ####
|
122
|
+
|
123
|
+
preload =
|
124
|
+
|
125
|
+
libpath = . $(libdir)
|
126
|
+
LIBPATH = -L. -L$(libdir) -Wl,-R$(libdir)
|
127
|
+
DEFFILE =
|
128
|
+
|
129
|
+
CLEANFILES = mkmf.log
|
130
|
+
DISTCLEANFILES =
|
131
|
+
DISTCLEANDIRS =
|
132
|
+
|
133
|
+
extout =
|
134
|
+
extout_prefix =
|
135
|
+
target_prefix =
|
136
|
+
LOCAL_LIBS =
|
137
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread -lgmp -ldl -lcrypt -lm -lc
|
138
|
+
ORIG_SRCS = knn.c
|
139
|
+
SRCS = $(ORIG_SRCS)
|
140
|
+
OBJS = knn.o
|
141
|
+
HDRS =
|
142
|
+
TARGET = c_knn
|
143
|
+
TARGET_NAME = c_knn
|
144
|
+
TARGET_ENTRY = Init_$(TARGET_NAME)
|
145
|
+
DLLIB = $(TARGET).so
|
146
|
+
EXTSTATIC =
|
147
|
+
STATIC_LIB =
|
148
|
+
|
149
|
+
TIMESTAMP_DIR = .
|
150
|
+
BINDIR = $(bindir)
|
151
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
152
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
153
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
154
|
+
HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
|
155
|
+
ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
|
156
|
+
|
157
|
+
TARGET_SO = $(DLLIB)
|
158
|
+
CLEANLIBS = $(TARGET).so
|
159
|
+
CLEANOBJS = *.o *.bak
|
160
|
+
|
161
|
+
all: $(DLLIB)
|
162
|
+
static: $(STATIC_LIB) install-rb
|
163
|
+
.PHONY: all install static install-so install-rb
|
164
|
+
.PHONY: clean clean-so clean-static clean-rb
|
165
|
+
|
166
|
+
clean-static::
|
167
|
+
clean-rb-default::
|
168
|
+
clean-rb::
|
169
|
+
clean-so::
|
170
|
+
clean: clean-so clean-static clean-rb-default clean-rb
|
171
|
+
-$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
|
172
|
+
|
173
|
+
distclean-rb-default::
|
174
|
+
distclean-rb::
|
175
|
+
distclean-so::
|
176
|
+
distclean-static::
|
177
|
+
distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
|
178
|
+
-$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
179
|
+
-$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
180
|
+
-$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
|
181
|
+
|
182
|
+
realclean: distclean
|
183
|
+
install: install-so install-rb
|
184
|
+
|
185
|
+
install-so: $(DLLIB) $(TIMESTAMP_DIR)/.RUBYARCHDIR.time
|
186
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
187
|
+
clean-static::
|
188
|
+
-$(Q)$(RM) $(STATIC_LIB)
|
189
|
+
install-rb: pre-install-rb install-rb-default
|
190
|
+
install-rb-default: pre-install-rb-default
|
191
|
+
pre-install-rb: Makefile
|
192
|
+
pre-install-rb-default: Makefile
|
193
|
+
pre-install-rb-default:
|
194
|
+
@$(NULLCMD)
|
195
|
+
$(TIMESTAMP_DIR)/.RUBYARCHDIR.time:
|
196
|
+
$(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
|
197
|
+
$(Q) $(TOUCH) $@
|
198
|
+
|
199
|
+
site-install: site-install-so site-install-rb
|
200
|
+
site-install-so: install-so
|
201
|
+
site-install-rb: install-rb
|
202
|
+
|
203
|
+
.SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
|
204
|
+
|
205
|
+
.cc.o:
|
206
|
+
$(ECHO) compiling $(<)
|
207
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
208
|
+
|
209
|
+
.cc.S:
|
210
|
+
$(ECHO) translating $(<)
|
211
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
212
|
+
|
213
|
+
.mm.o:
|
214
|
+
$(ECHO) compiling $(<)
|
215
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
216
|
+
|
217
|
+
.mm.S:
|
218
|
+
$(ECHO) translating $(<)
|
219
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
220
|
+
|
221
|
+
.cxx.o:
|
222
|
+
$(ECHO) compiling $(<)
|
223
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
224
|
+
|
225
|
+
.cxx.S:
|
226
|
+
$(ECHO) translating $(<)
|
227
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
228
|
+
|
229
|
+
.cpp.o:
|
230
|
+
$(ECHO) compiling $(<)
|
231
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
232
|
+
|
233
|
+
.cpp.S:
|
234
|
+
$(ECHO) translating $(<)
|
235
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
236
|
+
|
237
|
+
.c.o:
|
238
|
+
$(ECHO) compiling $(<)
|
239
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
240
|
+
|
241
|
+
.c.S:
|
242
|
+
$(ECHO) translating $(<)
|
243
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $<
|
244
|
+
|
245
|
+
.m.o:
|
246
|
+
$(ECHO) compiling $(<)
|
247
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
248
|
+
|
249
|
+
.m.S:
|
250
|
+
$(ECHO) translating $(<)
|
251
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $<
|
252
|
+
|
253
|
+
$(DLLIB): $(OBJS) Makefile
|
254
|
+
$(ECHO) linking shared-object $(DLLIB)
|
255
|
+
-$(Q)$(RM) $(@)
|
256
|
+
$(Q) $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
257
|
+
|
258
|
+
|
259
|
+
|
260
|
+
$(OBJS): $(HDRS) $(ruby_headers)
|
data/ext/c_knn/knn.c
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
/*
|
2
|
+
Native implementation of kNN leave-one-out cross validation for Ruby
|
3
|
+
David Charte (C) 2016
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
Based on the following work:
|
19
|
+
* class/src/class.c by W. N. Venables and B. D. Ripley Copyright (C) 1994-2002 (GPLv2)
|
20
|
+
*/
|
21
|
+
|
22
|
+
#include <ruby.h>
|
23
|
+
#include <math.h>
|
24
|
+
#include <float.h>
|
25
|
+
|
26
|
+
#define EPS 1e-4 /* relative test of equality of distances */
|
27
|
+
#define MAX_TIES 1000
|
28
|
+
/* Not worth doing this dynamically -- limits k + # ties + fence, in fact */
|
29
|
+
|
30
|
+
/* Definitions for Ruby */
|
31
|
+
VALUE KnnCv = Qnil;
|
32
|
+
VALUE Classifier = Qnil;
|
33
|
+
|
34
|
+
static VALUE C_instances;
|
35
|
+
static VALUE C_classes;
|
36
|
+
static VALUE C_numerics;
|
37
|
+
|
38
|
+
static double FLOAT_MAX;
|
39
|
+
|
40
|
+
void c_knn_free(int* data) {
|
41
|
+
free(data);
|
42
|
+
}
|
43
|
+
|
44
|
+
VALUE method_c_knn_leaveoneout(VALUE self, VALUE rb_features) {
|
45
|
+
double * instances = NULL;
|
46
|
+
int * classes = NULL;
|
47
|
+
int * which_numeric = NULL;
|
48
|
+
|
49
|
+
Data_Get_Struct(rb_iv_get(self, "@instances"), double, instances);
|
50
|
+
Data_Get_Struct(rb_iv_get(self, "@classes"), int, classes);
|
51
|
+
Data_Get_Struct(rb_iv_get(self, "@which_numeric"), int, which_numeric);
|
52
|
+
|
53
|
+
int nrow = NUM2INT(rb_iv_get(self, "@nrow"));
|
54
|
+
int ncol = NUM2INT(rb_iv_get(self, "@ncol"));
|
55
|
+
int num_neighbors = NUM2INT(rb_iv_get(self, "@num_neighbors"));
|
56
|
+
int class_count = NUM2INT(rb_iv_get(self, "@nclass"));
|
57
|
+
|
58
|
+
rb_features = rb_funcall(rb_features, rb_intern("to_a"), 0);
|
59
|
+
int correct_guesses;
|
60
|
+
double fitness;
|
61
|
+
|
62
|
+
|
63
|
+
/* The following is code based on the "class" package from R */
|
64
|
+
/***************************************************************
|
65
|
+
VR_knn input parameters:
|
66
|
+
Sint *kin, Sint *lin, Sint *pntr, Sint *pnte, Sint *p,
|
67
|
+
double *train, Sint *class, double *test, Sint *res, double *pr,
|
68
|
+
Sint *votes, Sint *nc, Sint *cv, Sint *use_all
|
69
|
+
***************************************************************/
|
70
|
+
int i, index, j, k, k1, kinit = num_neighbors, kn, l = 0, mm, npat, ntie, extras;
|
71
|
+
int pos[MAX_TIES];
|
72
|
+
double dist, tmp, nndist[MAX_TIES];
|
73
|
+
|
74
|
+
// Prediction results
|
75
|
+
int * res = (int*) malloc(sizeof(int) * nrow);
|
76
|
+
int * votes = (int*) malloc(sizeof(int) * class_count);
|
77
|
+
|
78
|
+
/*
|
79
|
+
Use a 'fence' in the (k+1)st position to avoid special cases.
|
80
|
+
Simple insertion sort will suffice since k will be small.
|
81
|
+
*/
|
82
|
+
|
83
|
+
for (npat = 0; npat < nrow; npat++) {
|
84
|
+
kn = kinit;
|
85
|
+
|
86
|
+
for (k = 0; k < kn; k++)
|
87
|
+
nndist[k] = 0.99 * FLOAT_MAX;
|
88
|
+
|
89
|
+
for (j = 0; j < nrow; j++) {
|
90
|
+
if (j == npat) // Skip own instance for leave-one-out cross_validation
|
91
|
+
continue;
|
92
|
+
|
93
|
+
dist = 0.0;
|
94
|
+
|
95
|
+
for (k = 0; k < ncol; k++) {
|
96
|
+
// Skip unselected features
|
97
|
+
if (NUM2INT(rb_ary_entry(rb_features, k))) {
|
98
|
+
// Distinguish numeric attributes from nominal
|
99
|
+
tmp = instances[npat * ncol + k] - instances[j * ncol + k];
|
100
|
+
|
101
|
+
if (which_numeric[k]) {
|
102
|
+
dist += tmp * tmp;
|
103
|
+
} else if (tmp < EPS && tmp > -EPS) { // Nominal feature
|
104
|
+
// Add 1 if values are different
|
105
|
+
dist += 1;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
/* Use 'fuzz' since distance computed could depend on order of coordinates */
|
111
|
+
if (dist <= nndist[kinit - 1] * (1 + EPS))
|
112
|
+
for (k = 0; k <= kn; k++)
|
113
|
+
if (dist < nndist[k]) {
|
114
|
+
for (k1 = kn; k1 > k; k1--) {
|
115
|
+
nndist[k1] = nndist[k1 - 1];
|
116
|
+
pos[k1] = pos[k1 - 1];
|
117
|
+
}
|
118
|
+
nndist[k] = dist;
|
119
|
+
pos[k] = j;
|
120
|
+
|
121
|
+
/* Keep an extra distance if the largest current one ties with current kth */
|
122
|
+
if (nndist[kn] <= nndist[kinit - 1])
|
123
|
+
if (++kn == MAX_TIES - 1)
|
124
|
+
return rb_float_new(-2.0); // Too many ties. Fail
|
125
|
+
break;
|
126
|
+
}
|
127
|
+
|
128
|
+
nndist[kn] = 0.99 * FLOAT_MAX;
|
129
|
+
}
|
130
|
+
|
131
|
+
for (j = 0; j < class_count; j++)
|
132
|
+
votes[j] = 0;
|
133
|
+
|
134
|
+
// use_all is true always so unneeded code has been removed
|
135
|
+
for (j = 0; j < kinit; j++){
|
136
|
+
votes[classes[pos[j]]]++;
|
137
|
+
}
|
138
|
+
extras = 0;
|
139
|
+
|
140
|
+
for (j = kinit; j < kn; j++) {
|
141
|
+
if (nndist[j] > nndist[kinit - 1] * (1 + EPS))
|
142
|
+
break;
|
143
|
+
|
144
|
+
extras++;
|
145
|
+
votes[classes[pos[j]]]++;
|
146
|
+
}
|
147
|
+
|
148
|
+
/* Use reservoir sampling to choose amongst the tied votes */
|
149
|
+
ntie = 1;
|
150
|
+
|
151
|
+
mm = votes[0];
|
152
|
+
index = 0;
|
153
|
+
|
154
|
+
for (i = 1; i < class_count; i++)
|
155
|
+
if (votes[i] > mm) {
|
156
|
+
ntie = 1;
|
157
|
+
index = i;
|
158
|
+
mm = votes[i];
|
159
|
+
} else if (votes[i] == mm && votes[i] >= l) {
|
160
|
+
// This line is causing segfaults:
|
161
|
+
//if (++ntie * NUM2DBL(rb_funcall(rb_random, rb_intern("rand"), 0)) < 1.0)
|
162
|
+
if (++ntie * NUM2DBL(rb_funcall(rb_iv_get(self, "@rng"), rb_intern("rand"), 0)) < 1.0)
|
163
|
+
index = i;
|
164
|
+
}
|
165
|
+
|
166
|
+
res[npat] = index;
|
167
|
+
//pr[npat] = (double) mm / (kinit + extras);
|
168
|
+
}
|
169
|
+
/* end of "class" code */
|
170
|
+
|
171
|
+
free(votes);
|
172
|
+
|
173
|
+
correct_guesses = 0;
|
174
|
+
|
175
|
+
for (npat = 0; npat < nrow; npat++) {
|
176
|
+
// Count correct guesses
|
177
|
+
correct_guesses += res[npat] == classes[npat];
|
178
|
+
}
|
179
|
+
|
180
|
+
free(res);
|
181
|
+
|
182
|
+
fitness = (double)(correct_guesses) / (double)(nrow);
|
183
|
+
|
184
|
+
return rb_float_new(fitness);
|
185
|
+
}
|
186
|
+
|
187
|
+
VALUE method_c_knn_initialize(VALUE self, VALUE rb_k, VALUE rb_dataset, VALUE rb_random_par) {
|
188
|
+
int ncol, nrow;
|
189
|
+
|
190
|
+
double * instances = NULL;
|
191
|
+
int * classes = NULL;
|
192
|
+
int * which_numeric = NULL;
|
193
|
+
|
194
|
+
VALUE data = rb_funcall(rb_dataset, rb_intern("instances"), 0);
|
195
|
+
VALUE rb_class = rb_funcall(rb_dataset, rb_intern("classes"), 0);
|
196
|
+
VALUE rb_numeric = rb_funcall(rb_dataset, rb_intern("numeric_attrs"), 0);
|
197
|
+
|
198
|
+
// Define global variables
|
199
|
+
rb_iv_set(self, "@num_neighbors", rb_k);
|
200
|
+
nrow = RARRAY_LEN(data);
|
201
|
+
rb_iv_set(self, "@nrow", INT2NUM(nrow));
|
202
|
+
ncol = RARRAY_LEN(rb_ary_entry(data, 0));
|
203
|
+
rb_iv_set(self, "@ncol", INT2NUM(ncol));
|
204
|
+
rb_iv_set(self, "@nclass", rb_funcall(rb_dataset, rb_intern("class_count"), 0));
|
205
|
+
FLOAT_MAX = NUM2DBL(rb_intern("Float::MAX"));
|
206
|
+
rb_iv_set(self, "@rng", rb_random_par);
|
207
|
+
|
208
|
+
instances = (double*) malloc(sizeof(double) * nrow * ncol);
|
209
|
+
|
210
|
+
int i, j;
|
211
|
+
for (i = 0; i < nrow; i++) {
|
212
|
+
for (j = 0; j < ncol; j++) {
|
213
|
+
if (TYPE(rb_ary_entry(rb_ary_entry(data, i), j)) == T_STRING) {
|
214
|
+
rb_raise(rb_eStandardError, "A string was found within the dataset. Aborting...");
|
215
|
+
} else
|
216
|
+
instances[i * ncol + j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
classes = (int*) malloc(sizeof(int) * nrow);
|
221
|
+
|
222
|
+
for (i = 0; i < nrow; i++) {
|
223
|
+
classes[i] = NUM2INT(rb_ary_entry(rb_class, i));
|
224
|
+
}
|
225
|
+
|
226
|
+
which_numeric = (int*) malloc(sizeof(int) * ncol);
|
227
|
+
|
228
|
+
for (j = 0; j < ncol; j++) {
|
229
|
+
which_numeric[j] = NUM2INT(rb_ary_entry(rb_numeric, j));
|
230
|
+
}
|
231
|
+
|
232
|
+
rb_iv_set(self, "@instances", Data_Wrap_Struct(C_instances, NULL, c_knn_free, instances));
|
233
|
+
rb_iv_set(self, "@classes", Data_Wrap_Struct(C_classes, NULL, c_knn_free, classes));
|
234
|
+
rb_iv_set(self, "@which_numeric", Data_Wrap_Struct(C_numerics, NULL, c_knn_free, which_numeric));
|
235
|
+
|
236
|
+
return self;
|
237
|
+
}
|
238
|
+
|
239
|
+
void Init_c_knn(void) {
|
240
|
+
KnnCv = rb_const_get(rb_cObject, rb_intern("KnnCv"));
|
241
|
+
Classifier = rb_define_class_under(KnnCv, "Classifier", rb_cObject);
|
242
|
+
|
243
|
+
/* Wrapper classes */
|
244
|
+
C_instances = rb_define_class_under(Classifier, "Instances", rb_cObject);
|
245
|
+
C_classes = rb_define_class_under(Classifier, "Classes", rb_cObject);
|
246
|
+
C_numerics = rb_define_class_under(Classifier, "Numerics", rb_cObject);
|
247
|
+
|
248
|
+
rb_define_method(Classifier, "initialize", method_c_knn_initialize, 3);
|
249
|
+
rb_define_method(Classifier, "fitness_for", method_c_knn_leaveoneout, 1);
|
250
|
+
}
|
data/knn_cv.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'knn_cv/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "knn_cv"
|
8
|
+
spec.version = KnnCv::VERSION
|
9
|
+
spec.authors = ["David Charte"]
|
10
|
+
spec.email = ["fdavidcl@outlook.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{A native kNN leave-one-out technique implementation for Ruby based on the 'class' package for R}
|
13
|
+
spec.description = %q{A native kNN leave-one-out technique implementation for Ruby based on the 'class' package for R}
|
14
|
+
spec.homepage = "https://github.com/fdavidcl/ruby-knn_cv"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
17
|
+
# delete this section to allow pushing this gem to any host.
|
18
|
+
# if spec.respond_to?(:metadata)
|
19
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
20
|
+
# else
|
21
|
+
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
22
|
+
# end
|
23
|
+
|
24
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
+
spec.bindir = "exe"
|
26
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
spec.extensions << "ext/c_knn/extconf.rb"
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
33
|
+
spec.add_development_dependency "rake-compiler"
|
34
|
+
end
|