knn_cv 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/.travis.yml +4 -0
- data/Gemfile +4 -0
- data/LICENSE +674 -0
- data/README.md +36 -0
- data/Rakefile +14 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/c_knn/Makefile +260 -0
- data/ext/c_knn/extconf.rb +5 -0
- data/ext/c_knn/knn.c +250 -0
- data/knn_cv.gemspec +34 -0
- data/lib/knn_cv/version.rb +3 -0
- data/lib/knn_cv.rb +7 -0
- metadata +116 -0
data/README.md
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# KnnCv
|
2
|
+
|
3
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/knn_cv`. To experiment with that code, run `bin/console` for an interactive prompt.
|
4
|
+
|
5
|
+
TODO: Delete this and the text above, and describe your gem
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'knn_cv'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install knn_cv
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/knn_cv.
|
36
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "rake/testtask"
|
3
|
+
|
4
|
+
Rake::TestTask.new(:test) do |t|
|
5
|
+
t.libs << "test"
|
6
|
+
t.libs << "lib"
|
7
|
+
t.test_files = FileList['test/**/*_test.rb']
|
8
|
+
end
|
9
|
+
|
10
|
+
task :default => :spec
|
11
|
+
|
12
|
+
require 'rake/extensiontask'
|
13
|
+
spec = Gem::Specification.load('feature-selection.gemspec')
|
14
|
+
Rake::ExtensionTask.new('c_knn', spec)
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "knn_cv"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/bin/setup
ADDED
data/ext/c_knn/Makefile
ADDED
@@ -0,0 +1,260 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
# V=0 quiet, V=1 verbose. other values don't work.
|
5
|
+
V = 0
|
6
|
+
Q1 = $(V:1=)
|
7
|
+
Q = $(Q1:0=@)
|
8
|
+
ECHO1 = $(V:1=@:)
|
9
|
+
ECHO = $(ECHO1:0=@echo)
|
10
|
+
NULLCMD = :
|
11
|
+
|
12
|
+
#### Start of system configuration section. ####
|
13
|
+
|
14
|
+
srcdir = .
|
15
|
+
topdir = /home/fdavidcl/.rvm/rubies/ruby-2.3.0/include/ruby-2.3.0
|
16
|
+
hdrdir = $(topdir)
|
17
|
+
arch_hdrdir = /home/fdavidcl/.rvm/rubies/ruby-2.3.0/include/ruby-2.3.0/x86_64-linux
|
18
|
+
PATH_SEPARATOR = :
|
19
|
+
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
20
|
+
prefix = $(DESTDIR)/home/fdavidcl/.rvm/rubies/ruby-2.3.0
|
21
|
+
rubysitearchprefix = $(rubylibprefix)/$(sitearch)
|
22
|
+
rubyarchprefix = $(rubylibprefix)/$(arch)
|
23
|
+
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
|
24
|
+
exec_prefix = $(prefix)
|
25
|
+
vendorarchhdrdir = $(vendorhdrdir)/$(sitearch)
|
26
|
+
sitearchhdrdir = $(sitehdrdir)/$(sitearch)
|
27
|
+
rubyarchhdrdir = $(rubyhdrdir)/$(arch)
|
28
|
+
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
29
|
+
sitehdrdir = $(rubyhdrdir)/site_ruby
|
30
|
+
rubyhdrdir = $(includedir)/$(RUBY_VERSION_NAME)
|
31
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
32
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
33
|
+
vendordir = $(rubylibprefix)/vendor_ruby
|
34
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
35
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
36
|
+
sitedir = $(rubylibprefix)/site_ruby
|
37
|
+
rubyarchdir = $(rubylibdir)/$(arch)
|
38
|
+
rubylibdir = $(rubylibprefix)/$(ruby_version)
|
39
|
+
sitearchincludedir = $(includedir)/$(sitearch)
|
40
|
+
archincludedir = $(includedir)/$(arch)
|
41
|
+
sitearchlibdir = $(libdir)/$(sitearch)
|
42
|
+
archlibdir = $(libdir)/$(arch)
|
43
|
+
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
44
|
+
mandir = $(datarootdir)/man
|
45
|
+
localedir = $(datarootdir)/locale
|
46
|
+
libdir = $(exec_prefix)/lib
|
47
|
+
psdir = $(docdir)
|
48
|
+
pdfdir = $(docdir)
|
49
|
+
dvidir = $(docdir)
|
50
|
+
htmldir = $(docdir)
|
51
|
+
infodir = $(datarootdir)/info
|
52
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
53
|
+
oldincludedir = $(DESTDIR)/usr/include
|
54
|
+
includedir = $(prefix)/include
|
55
|
+
localstatedir = $(prefix)/var
|
56
|
+
sharedstatedir = $(prefix)/com
|
57
|
+
sysconfdir = $(prefix)/etc
|
58
|
+
datadir = $(datarootdir)
|
59
|
+
datarootdir = $(prefix)/share
|
60
|
+
libexecdir = $(exec_prefix)/libexec
|
61
|
+
sbindir = $(exec_prefix)/sbin
|
62
|
+
bindir = $(exec_prefix)/bin
|
63
|
+
archdir = $(rubyarchdir)
|
64
|
+
|
65
|
+
|
66
|
+
CC = gcc
|
67
|
+
CXX = g++
|
68
|
+
LIBRUBY = $(LIBRUBY_SO)
|
69
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
70
|
+
LIBRUBYARG_SHARED = -Wl,-R$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)
|
71
|
+
LIBRUBYARG_STATIC = -Wl,-R$(libdir) -L$(libdir) -l$(RUBY_SO_NAME)-static
|
72
|
+
empty =
|
73
|
+
OUTFLAG = -o $(empty)
|
74
|
+
COUTFLAG = -o $(empty)
|
75
|
+
|
76
|
+
RUBY_EXTCONF_H =
|
77
|
+
cflags = $(optflags) $(debugflags) $(warnflags)
|
78
|
+
cxxflags = $(optflags) $(debugflags) $(warnflags)
|
79
|
+
optflags = -O3 -fno-fast-math
|
80
|
+
debugflags = -ggdb3
|
81
|
+
warnflags = -Wall -Wextra -Wno-unused-parameter -Wno-parentheses -Wno-long-long -Wno-missing-field-initializers -Wunused-variable -Wpointer-arith -Wwrite-strings -Wdeclaration-after-statement -Wimplicit-function-declaration -Wdeprecated-declarations -Wno-packed-bitfield-compat -Wno-maybe-uninitialized
|
82
|
+
CCDLFLAGS = -fPIC
|
83
|
+
CFLAGS = $(CCDLFLAGS) $(cflags) -fPIC $(ARCH_FLAG)
|
84
|
+
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir)
|
85
|
+
DEFS =
|
86
|
+
CPPFLAGS = $(DEFS) $(cppflags)
|
87
|
+
CXXFLAGS = $(CCDLFLAGS) $(cxxflags) $(ARCH_FLAG)
|
88
|
+
ldflags = -L. -fstack-protector -rdynamic -Wl,-export-dynamic
|
89
|
+
dldflags =
|
90
|
+
ARCH_FLAG =
|
91
|
+
DLDFLAGS = $(ldflags) $(dldflags) $(ARCH_FLAG)
|
92
|
+
LDSHARED = $(CC) -shared
|
93
|
+
LDSHAREDXX = $(CXX) -shared
|
94
|
+
AR = ar
|
95
|
+
EXEEXT =
|
96
|
+
|
97
|
+
RUBY_INSTALL_NAME = $(RUBY_BASE_NAME)
|
98
|
+
RUBY_SO_NAME = ruby
|
99
|
+
RUBYW_INSTALL_NAME =
|
100
|
+
RUBY_VERSION_NAME = $(RUBY_BASE_NAME)-$(ruby_version)
|
101
|
+
RUBYW_BASE_NAME = rubyw
|
102
|
+
RUBY_BASE_NAME = ruby
|
103
|
+
|
104
|
+
arch = x86_64-linux
|
105
|
+
sitearch = $(arch)
|
106
|
+
ruby_version = 2.3.0
|
107
|
+
ruby = $(bindir)/$(RUBY_BASE_NAME)
|
108
|
+
RUBY = $(ruby)
|
109
|
+
ruby_headers = $(hdrdir)/ruby.h $(hdrdir)/ruby/ruby.h $(hdrdir)/ruby/defines.h $(hdrdir)/ruby/missing.h $(hdrdir)/ruby/intern.h $(hdrdir)/ruby/st.h $(hdrdir)/ruby/subst.h $(arch_hdrdir)/ruby/config.h
|
110
|
+
|
111
|
+
RM = rm -f
|
112
|
+
RM_RF = $(RUBY) -run -e rm -- -rf
|
113
|
+
RMDIRS = rmdir --ignore-fail-on-non-empty -p
|
114
|
+
MAKEDIRS = /usr/bin/mkdir -p
|
115
|
+
INSTALL = /usr/bin/install -c
|
116
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
117
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
118
|
+
COPY = cp
|
119
|
+
TOUCH = exit >
|
120
|
+
|
121
|
+
#### End of system configuration section. ####
|
122
|
+
|
123
|
+
preload =
|
124
|
+
|
125
|
+
libpath = . $(libdir)
|
126
|
+
LIBPATH = -L. -L$(libdir) -Wl,-R$(libdir)
|
127
|
+
DEFFILE =
|
128
|
+
|
129
|
+
CLEANFILES = mkmf.log
|
130
|
+
DISTCLEANFILES =
|
131
|
+
DISTCLEANDIRS =
|
132
|
+
|
133
|
+
extout =
|
134
|
+
extout_prefix =
|
135
|
+
target_prefix =
|
136
|
+
LOCAL_LIBS =
|
137
|
+
LIBS = $(LIBRUBYARG_SHARED) -lpthread -lgmp -ldl -lcrypt -lm -lc
|
138
|
+
ORIG_SRCS = knn.c
|
139
|
+
SRCS = $(ORIG_SRCS)
|
140
|
+
OBJS = knn.o
|
141
|
+
HDRS =
|
142
|
+
TARGET = c_knn
|
143
|
+
TARGET_NAME = c_knn
|
144
|
+
TARGET_ENTRY = Init_$(TARGET_NAME)
|
145
|
+
DLLIB = $(TARGET).so
|
146
|
+
EXTSTATIC =
|
147
|
+
STATIC_LIB =
|
148
|
+
|
149
|
+
TIMESTAMP_DIR = .
|
150
|
+
BINDIR = $(bindir)
|
151
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
152
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
153
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
154
|
+
HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
|
155
|
+
ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
|
156
|
+
|
157
|
+
TARGET_SO = $(DLLIB)
|
158
|
+
CLEANLIBS = $(TARGET).so
|
159
|
+
CLEANOBJS = *.o *.bak
|
160
|
+
|
161
|
+
all: $(DLLIB)
|
162
|
+
static: $(STATIC_LIB) install-rb
|
163
|
+
.PHONY: all install static install-so install-rb
|
164
|
+
.PHONY: clean clean-so clean-static clean-rb
|
165
|
+
|
166
|
+
clean-static::
|
167
|
+
clean-rb-default::
|
168
|
+
clean-rb::
|
169
|
+
clean-so::
|
170
|
+
clean: clean-so clean-static clean-rb-default clean-rb
|
171
|
+
-$(Q)$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES) .*.time
|
172
|
+
|
173
|
+
distclean-rb-default::
|
174
|
+
distclean-rb::
|
175
|
+
distclean-so::
|
176
|
+
distclean-static::
|
177
|
+
distclean: clean distclean-so distclean-static distclean-rb-default distclean-rb
|
178
|
+
-$(Q)$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
179
|
+
-$(Q)$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
180
|
+
-$(Q)$(RMDIRS) $(DISTCLEANDIRS) 2> /dev/null || true
|
181
|
+
|
182
|
+
realclean: distclean
|
183
|
+
install: install-so install-rb
|
184
|
+
|
185
|
+
install-so: $(DLLIB) $(TIMESTAMP_DIR)/.RUBYARCHDIR.time
|
186
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
187
|
+
clean-static::
|
188
|
+
-$(Q)$(RM) $(STATIC_LIB)
|
189
|
+
install-rb: pre-install-rb install-rb-default
|
190
|
+
install-rb-default: pre-install-rb-default
|
191
|
+
pre-install-rb: Makefile
|
192
|
+
pre-install-rb-default: Makefile
|
193
|
+
pre-install-rb-default:
|
194
|
+
@$(NULLCMD)
|
195
|
+
$(TIMESTAMP_DIR)/.RUBYARCHDIR.time:
|
196
|
+
$(Q) $(MAKEDIRS) $(@D) $(RUBYARCHDIR)
|
197
|
+
$(Q) $(TOUCH) $@
|
198
|
+
|
199
|
+
site-install: site-install-so site-install-rb
|
200
|
+
site-install-so: install-so
|
201
|
+
site-install-rb: install-rb
|
202
|
+
|
203
|
+
.SUFFIXES: .c .m .cc .mm .cxx .cpp .o .S
|
204
|
+
|
205
|
+
.cc.o:
|
206
|
+
$(ECHO) compiling $(<)
|
207
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
208
|
+
|
209
|
+
.cc.S:
|
210
|
+
$(ECHO) translating $(<)
|
211
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
212
|
+
|
213
|
+
.mm.o:
|
214
|
+
$(ECHO) compiling $(<)
|
215
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
216
|
+
|
217
|
+
.mm.S:
|
218
|
+
$(ECHO) translating $(<)
|
219
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
220
|
+
|
221
|
+
.cxx.o:
|
222
|
+
$(ECHO) compiling $(<)
|
223
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
224
|
+
|
225
|
+
.cxx.S:
|
226
|
+
$(ECHO) translating $(<)
|
227
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
228
|
+
|
229
|
+
.cpp.o:
|
230
|
+
$(ECHO) compiling $(<)
|
231
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
232
|
+
|
233
|
+
.cpp.S:
|
234
|
+
$(ECHO) translating $(<)
|
235
|
+
$(Q) $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -S $<
|
236
|
+
|
237
|
+
.c.o:
|
238
|
+
$(ECHO) compiling $(<)
|
239
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
240
|
+
|
241
|
+
.c.S:
|
242
|
+
$(ECHO) translating $(<)
|
243
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $<
|
244
|
+
|
245
|
+
.m.o:
|
246
|
+
$(ECHO) compiling $(<)
|
247
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
248
|
+
|
249
|
+
.m.S:
|
250
|
+
$(ECHO) translating $(<)
|
251
|
+
$(Q) $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -S $<
|
252
|
+
|
253
|
+
$(DLLIB): $(OBJS) Makefile
|
254
|
+
$(ECHO) linking shared-object $(DLLIB)
|
255
|
+
-$(Q)$(RM) $(@)
|
256
|
+
$(Q) $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
257
|
+
|
258
|
+
|
259
|
+
|
260
|
+
$(OBJS): $(HDRS) $(ruby_headers)
|
data/ext/c_knn/knn.c
ADDED
@@ -0,0 +1,250 @@
|
|
1
|
+
/*
|
2
|
+
Native implementation of kNN leave-one-out cross validation for Ruby
|
3
|
+
David Charte (C) 2016
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
|
18
|
+
Based on the following work:
|
19
|
+
* class/src/class.c by W. N. Venables and B. D. Ripley Copyright (C) 1994-2002 (GPLv2)
|
20
|
+
*/
|
21
|
+
|
22
|
+
#include <ruby.h>
|
23
|
+
#include <math.h>
|
24
|
+
#include <float.h>
|
25
|
+
|
26
|
+
#define EPS 1e-4 /* relative test of equality of distances */
|
27
|
+
#define MAX_TIES 1000
|
28
|
+
/* Not worth doing this dynamically -- limits k + # ties + fence, in fact */
|
29
|
+
|
30
|
+
/* Definitions for Ruby */
|
31
|
+
VALUE KnnCv = Qnil;
|
32
|
+
VALUE Classifier = Qnil;
|
33
|
+
|
34
|
+
static VALUE C_instances;
|
35
|
+
static VALUE C_classes;
|
36
|
+
static VALUE C_numerics;
|
37
|
+
|
38
|
+
static double FLOAT_MAX;
|
39
|
+
|
40
|
+
void c_knn_free(int* data) {
|
41
|
+
free(data);
|
42
|
+
}
|
43
|
+
|
44
|
+
VALUE method_c_knn_leaveoneout(VALUE self, VALUE rb_features) {
|
45
|
+
double * instances = NULL;
|
46
|
+
int * classes = NULL;
|
47
|
+
int * which_numeric = NULL;
|
48
|
+
|
49
|
+
Data_Get_Struct(rb_iv_get(self, "@instances"), double, instances);
|
50
|
+
Data_Get_Struct(rb_iv_get(self, "@classes"), int, classes);
|
51
|
+
Data_Get_Struct(rb_iv_get(self, "@which_numeric"), int, which_numeric);
|
52
|
+
|
53
|
+
int nrow = NUM2INT(rb_iv_get(self, "@nrow"));
|
54
|
+
int ncol = NUM2INT(rb_iv_get(self, "@ncol"));
|
55
|
+
int num_neighbors = NUM2INT(rb_iv_get(self, "@num_neighbors"));
|
56
|
+
int class_count = NUM2INT(rb_iv_get(self, "@nclass"));
|
57
|
+
|
58
|
+
rb_features = rb_funcall(rb_features, rb_intern("to_a"), 0);
|
59
|
+
int correct_guesses;
|
60
|
+
double fitness;
|
61
|
+
|
62
|
+
|
63
|
+
/* The following is code based on the "class" package from R */
|
64
|
+
/***************************************************************
|
65
|
+
VR_knn input parameters:
|
66
|
+
Sint *kin, Sint *lin, Sint *pntr, Sint *pnte, Sint *p,
|
67
|
+
double *train, Sint *class, double *test, Sint *res, double *pr,
|
68
|
+
Sint *votes, Sint *nc, Sint *cv, Sint *use_all
|
69
|
+
***************************************************************/
|
70
|
+
int i, index, j, k, k1, kinit = num_neighbors, kn, l = 0, mm, npat, ntie, extras;
|
71
|
+
int pos[MAX_TIES];
|
72
|
+
double dist, tmp, nndist[MAX_TIES];
|
73
|
+
|
74
|
+
// Prediction results
|
75
|
+
int * res = (int*) malloc(sizeof(int) * nrow);
|
76
|
+
int * votes = (int*) malloc(sizeof(int) * class_count);
|
77
|
+
|
78
|
+
/*
|
79
|
+
Use a 'fence' in the (k+1)st position to avoid special cases.
|
80
|
+
Simple insertion sort will suffice since k will be small.
|
81
|
+
*/
|
82
|
+
|
83
|
+
for (npat = 0; npat < nrow; npat++) {
|
84
|
+
kn = kinit;
|
85
|
+
|
86
|
+
for (k = 0; k < kn; k++)
|
87
|
+
nndist[k] = 0.99 * FLOAT_MAX;
|
88
|
+
|
89
|
+
for (j = 0; j < nrow; j++) {
|
90
|
+
if (j == npat) // Skip own instance for leave-one-out cross_validation
|
91
|
+
continue;
|
92
|
+
|
93
|
+
dist = 0.0;
|
94
|
+
|
95
|
+
for (k = 0; k < ncol; k++) {
|
96
|
+
// Skip unselected features
|
97
|
+
if (NUM2INT(rb_ary_entry(rb_features, k))) {
|
98
|
+
// Distinguish numeric attributes from nominal
|
99
|
+
tmp = instances[npat * ncol + k] - instances[j * ncol + k];
|
100
|
+
|
101
|
+
if (which_numeric[k]) {
|
102
|
+
dist += tmp * tmp;
|
103
|
+
} else if (tmp < EPS && tmp > -EPS) { // Nominal feature
|
104
|
+
// Add 1 if values are different
|
105
|
+
dist += 1;
|
106
|
+
}
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
/* Use 'fuzz' since distance computed could depend on order of coordinates */
|
111
|
+
if (dist <= nndist[kinit - 1] * (1 + EPS))
|
112
|
+
for (k = 0; k <= kn; k++)
|
113
|
+
if (dist < nndist[k]) {
|
114
|
+
for (k1 = kn; k1 > k; k1--) {
|
115
|
+
nndist[k1] = nndist[k1 - 1];
|
116
|
+
pos[k1] = pos[k1 - 1];
|
117
|
+
}
|
118
|
+
nndist[k] = dist;
|
119
|
+
pos[k] = j;
|
120
|
+
|
121
|
+
/* Keep an extra distance if the largest current one ties with current kth */
|
122
|
+
if (nndist[kn] <= nndist[kinit - 1])
|
123
|
+
if (++kn == MAX_TIES - 1)
|
124
|
+
return rb_float_new(-2.0); // Too many ties. Fail
|
125
|
+
break;
|
126
|
+
}
|
127
|
+
|
128
|
+
nndist[kn] = 0.99 * FLOAT_MAX;
|
129
|
+
}
|
130
|
+
|
131
|
+
for (j = 0; j < class_count; j++)
|
132
|
+
votes[j] = 0;
|
133
|
+
|
134
|
+
// use_all is true always so unneeded code has been removed
|
135
|
+
for (j = 0; j < kinit; j++){
|
136
|
+
votes[classes[pos[j]]]++;
|
137
|
+
}
|
138
|
+
extras = 0;
|
139
|
+
|
140
|
+
for (j = kinit; j < kn; j++) {
|
141
|
+
if (nndist[j] > nndist[kinit - 1] * (1 + EPS))
|
142
|
+
break;
|
143
|
+
|
144
|
+
extras++;
|
145
|
+
votes[classes[pos[j]]]++;
|
146
|
+
}
|
147
|
+
|
148
|
+
/* Use reservoir sampling to choose amongst the tied votes */
|
149
|
+
ntie = 1;
|
150
|
+
|
151
|
+
mm = votes[0];
|
152
|
+
index = 0;
|
153
|
+
|
154
|
+
for (i = 1; i < class_count; i++)
|
155
|
+
if (votes[i] > mm) {
|
156
|
+
ntie = 1;
|
157
|
+
index = i;
|
158
|
+
mm = votes[i];
|
159
|
+
} else if (votes[i] == mm && votes[i] >= l) {
|
160
|
+
// This line is causing segfaults:
|
161
|
+
//if (++ntie * NUM2DBL(rb_funcall(rb_random, rb_intern("rand"), 0)) < 1.0)
|
162
|
+
if (++ntie * NUM2DBL(rb_funcall(rb_iv_get(self, "@rng"), rb_intern("rand"), 0)) < 1.0)
|
163
|
+
index = i;
|
164
|
+
}
|
165
|
+
|
166
|
+
res[npat] = index;
|
167
|
+
//pr[npat] = (double) mm / (kinit + extras);
|
168
|
+
}
|
169
|
+
/* end of "class" code */
|
170
|
+
|
171
|
+
free(votes);
|
172
|
+
|
173
|
+
correct_guesses = 0;
|
174
|
+
|
175
|
+
for (npat = 0; npat < nrow; npat++) {
|
176
|
+
// Count correct guesses
|
177
|
+
correct_guesses += res[npat] == classes[npat];
|
178
|
+
}
|
179
|
+
|
180
|
+
free(res);
|
181
|
+
|
182
|
+
fitness = (double)(correct_guesses) / (double)(nrow);
|
183
|
+
|
184
|
+
return rb_float_new(fitness);
|
185
|
+
}
|
186
|
+
|
187
|
+
VALUE method_c_knn_initialize(VALUE self, VALUE rb_k, VALUE rb_dataset, VALUE rb_random_par) {
|
188
|
+
int ncol, nrow;
|
189
|
+
|
190
|
+
double * instances = NULL;
|
191
|
+
int * classes = NULL;
|
192
|
+
int * which_numeric = NULL;
|
193
|
+
|
194
|
+
VALUE data = rb_funcall(rb_dataset, rb_intern("instances"), 0);
|
195
|
+
VALUE rb_class = rb_funcall(rb_dataset, rb_intern("classes"), 0);
|
196
|
+
VALUE rb_numeric = rb_funcall(rb_dataset, rb_intern("numeric_attrs"), 0);
|
197
|
+
|
198
|
+
// Define global variables
|
199
|
+
rb_iv_set(self, "@num_neighbors", rb_k);
|
200
|
+
nrow = RARRAY_LEN(data);
|
201
|
+
rb_iv_set(self, "@nrow", INT2NUM(nrow));
|
202
|
+
ncol = RARRAY_LEN(rb_ary_entry(data, 0));
|
203
|
+
rb_iv_set(self, "@ncol", INT2NUM(ncol));
|
204
|
+
rb_iv_set(self, "@nclass", rb_funcall(rb_dataset, rb_intern("class_count"), 0));
|
205
|
+
FLOAT_MAX = NUM2DBL(rb_intern("Float::MAX"));
|
206
|
+
rb_iv_set(self, "@rng", rb_random_par);
|
207
|
+
|
208
|
+
instances = (double*) malloc(sizeof(double) * nrow * ncol);
|
209
|
+
|
210
|
+
int i, j;
|
211
|
+
for (i = 0; i < nrow; i++) {
|
212
|
+
for (j = 0; j < ncol; j++) {
|
213
|
+
if (TYPE(rb_ary_entry(rb_ary_entry(data, i), j)) == T_STRING) {
|
214
|
+
rb_raise(rb_eStandardError, "A string was found within the dataset. Aborting...");
|
215
|
+
} else
|
216
|
+
instances[i * ncol + j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
|
217
|
+
}
|
218
|
+
}
|
219
|
+
|
220
|
+
classes = (int*) malloc(sizeof(int) * nrow);
|
221
|
+
|
222
|
+
for (i = 0; i < nrow; i++) {
|
223
|
+
classes[i] = NUM2INT(rb_ary_entry(rb_class, i));
|
224
|
+
}
|
225
|
+
|
226
|
+
which_numeric = (int*) malloc(sizeof(int) * ncol);
|
227
|
+
|
228
|
+
for (j = 0; j < ncol; j++) {
|
229
|
+
which_numeric[j] = NUM2INT(rb_ary_entry(rb_numeric, j));
|
230
|
+
}
|
231
|
+
|
232
|
+
rb_iv_set(self, "@instances", Data_Wrap_Struct(C_instances, NULL, c_knn_free, instances));
|
233
|
+
rb_iv_set(self, "@classes", Data_Wrap_Struct(C_classes, NULL, c_knn_free, classes));
|
234
|
+
rb_iv_set(self, "@which_numeric", Data_Wrap_Struct(C_numerics, NULL, c_knn_free, which_numeric));
|
235
|
+
|
236
|
+
return self;
|
237
|
+
}
|
238
|
+
|
239
|
+
void Init_c_knn(void) {
|
240
|
+
KnnCv = rb_const_get(rb_cObject, rb_intern("KnnCv"));
|
241
|
+
Classifier = rb_define_class_under(KnnCv, "Classifier", rb_cObject);
|
242
|
+
|
243
|
+
/* Wrapper classes */
|
244
|
+
C_instances = rb_define_class_under(Classifier, "Instances", rb_cObject);
|
245
|
+
C_classes = rb_define_class_under(Classifier, "Classes", rb_cObject);
|
246
|
+
C_numerics = rb_define_class_under(Classifier, "Numerics", rb_cObject);
|
247
|
+
|
248
|
+
rb_define_method(Classifier, "initialize", method_c_knn_initialize, 3);
|
249
|
+
rb_define_method(Classifier, "fitness_for", method_c_knn_leaveoneout, 1);
|
250
|
+
}
|
data/knn_cv.gemspec
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'knn_cv/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "knn_cv"
|
8
|
+
spec.version = KnnCv::VERSION
|
9
|
+
spec.authors = ["David Charte"]
|
10
|
+
spec.email = ["fdavidcl@outlook.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{A native kNN leave-one-out technique implementation for Ruby based on the 'class' package for R}
|
13
|
+
spec.description = %q{A native kNN leave-one-out technique implementation for Ruby based on the 'class' package for R}
|
14
|
+
spec.homepage = "https://github.com/fdavidcl/ruby-knn_cv"
|
15
|
+
|
16
|
+
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
17
|
+
# delete this section to allow pushing this gem to any host.
|
18
|
+
# if spec.respond_to?(:metadata)
|
19
|
+
# spec.metadata['allowed_push_host'] = "TODO: Set to 'http://mygemserver.com'"
|
20
|
+
# else
|
21
|
+
# raise "RubyGems 2.0 or newer is required to protect against public gem pushes."
|
22
|
+
# end
|
23
|
+
|
24
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
|
+
spec.bindir = "exe"
|
26
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
27
|
+
spec.require_paths = ["lib"]
|
28
|
+
spec.extensions << "ext/c_knn/extconf.rb"
|
29
|
+
|
30
|
+
spec.add_development_dependency "bundler", "~> 1.11"
|
31
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
32
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
33
|
+
spec.add_development_dependency "rake-compiler"
|
34
|
+
end
|