thera 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +56 -0
- data/Gemfile +2 -0
- data/Gemfile.lock +20 -0
- data/LICENSE.txt +1 -0
- data/README.rdoc +8 -0
- data/Rakefile +1 -0
- data/ext/Makefile +225 -0
- data/ext/extconf.rb +29 -0
- data/ext/quarry/quarry_toolkit.cpp +148 -0
- data/lib/quarry/Makefile.linux +2 -0
- data/lib/quarry/Makefile.osx +6 -0
- data/lib/quarry/Makefile.targets +23 -0
- data/lib/quarry/obj/.gitkeep +0 -0
- data/lib/quarry/src/classifier/aode/aode_classifier.cpp +0 -0
- data/lib/quarry/src/classifier/aode/aode_classifier.h +0 -0
- data/lib/quarry/src/classifier/centroid/centroid_classifier.cpp +0 -0
- data/lib/quarry/src/classifier/centroid/centroid_classifier.h +0 -0
- data/lib/quarry/src/classifier/classifier.cpp +32 -0
- data/lib/quarry/src/classifier/classifier.h +59 -0
- data/lib/quarry/src/classifier/knn/knn_classifier.cpp +0 -0
- data/lib/quarry/src/classifier/knn/knn_classifier.h +0 -0
- data/lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.cpp +40 -0
- data/lib/quarry/src/classifier/multinomial_bayes/multinomial_bayes_classifier.h +18 -0
- data/lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.cpp +80 -0
- data/lib/quarry/src/classifier/naive_bayes/naive_bayes_classifier.h +52 -0
- data/lib/quarry/src/data_set/data_set.cpp +130 -0
- data/lib/quarry/src/data_set/data_set.h +78 -0
- data/lib/quarry/src/data_set/dense/dense_data_set.h +39 -0
- data/lib/quarry/src/data_set/dense/dense_example.h +44 -0
- data/lib/quarry/src/data_set/example.cpp +10 -0
- data/lib/quarry/src/data_set/example.h +23 -0
- data/lib/quarry/src/data_set/feature.h +36 -0
- data/lib/quarry/src/data_set/features/nominal_feature.cpp +57 -0
- data/lib/quarry/src/data_set/features/nominal_feature.h +76 -0
- data/lib/quarry/src/data_set/features/numeric_feature.cpp +69 -0
- data/lib/quarry/src/data_set/features/numeric_feature.h +78 -0
- data/lib/quarry/src/data_set/sparse/sparse_data_set.h +40 -0
- data/lib/quarry/src/data_set/sparse/sparse_example.cpp +82 -0
- data/lib/quarry/src/data_set/sparse/sparse_example.h +38 -0
- data/lib/quarry/src/metrics/confusion_matrix.cpp +129 -0
- data/lib/quarry/src/metrics/confusion_matrix.h +82 -0
- data/lib/quarry/src/model/model.cpp +29 -0
- data/lib/quarry/src/model/model.h +50 -0
- data/lib/quarry/src/preprocessing/examples/example_preprocessor.h +20 -0
- data/lib/quarry/src/preprocessing/examples/weights/binary_weight.h +20 -0
- data/lib/quarry/src/preprocessing/examples/weights/local_weight.h +29 -0
- data/lib/quarry/src/preprocessing/text/example_generator/example_generator.h +19 -0
- data/lib/quarry/src/preprocessing/text/example_generator/token_counter.h +59 -0
- data/lib/quarry/src/preprocessing/text/inplace_processor/downcase.h +26 -0
- data/lib/quarry/src/preprocessing/text/inplace_processor/inplace_processor.h +17 -0
- data/lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer.h +44 -0
- data/lib/quarry/src/preprocessing/text/inplace_processor/porter_stemmer_original.cpp +375 -0
- data/lib/quarry/src/preprocessing/text/text_pipeline.cpp +29 -0
- data/lib/quarry/src/preprocessing/text/text_pipeline.h +37 -0
- data/lib/quarry/src/preprocessing/text/token_selector/pos_tag_selector.h +21 -0
- data/lib/quarry/src/preprocessing/text/token_selector/stop_words.cpp +82 -0
- data/lib/quarry/src/preprocessing/text/token_selector/stop_words.h +20 -0
- data/lib/quarry/src/preprocessing/text/token_selector/token_selector.h +17 -0
- data/lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.cpp +29 -0
- data/lib/quarry/src/preprocessing/text/tokeniser/simple_tokeniser.h +20 -0
- data/lib/quarry/src/preprocessing/text/tokeniser/tokeniser.h +19 -0
- data/lib/quarry/src/quarry.cpp +1 -0
- data/lib/quarry/src/quarry.h +29 -0
- data/lib/quarry/src/storage/arff.cpp +198 -0
- data/lib/quarry/src/storage/arff.h +26 -0
- data/lib/quarry/src/storage/binary.cpp +457 -0
- data/lib/quarry/src/storage/binary.h +79 -0
- data/lib/quarry/src/storage/folders.cpp +98 -0
- data/lib/quarry/src/storage/folders.h +25 -0
- data/lib/quarry/src/storage/storage.h +19 -0
- data/lib/quarry/src/test.cpp +6 -0
- data/lib/quarry_rb/classifier/classifier.rb +22 -0
- data/lib/quarry_rb/classifier/naive_bayes_classifier.rb +10 -0
- data/lib/quarry_rb/confusion_matrix.rb +58 -0
- data/lib/quarry_rb/data_set/data_set.rb +42 -0
- data/lib/quarry_rb/data_set/example.rb +33 -0
- data/lib/quarry_rb/data_set/feature.rb +28 -0
- data/lib/quarry_rb/enumerable_helper.rb +32 -0
- data/lib/quarry_rb/model/model.rb +56 -0
- data/lib/quarry_rb/storage/arff.rb +11 -0
- data/lib/quarry_rb/storage/binary.rb +23 -0
- data/lib/quarry_rb/storage/folders.rb +11 -0
- data/lib/quarry_rb/text_pipeline.rb +16 -0
- data/lib/thera.rb +20 -0
- data/test/helper.rb +19 -0
- data/test/test_quarry.rb +33 -0
- data/thera.gemspec +21 -0
- metadata +148 -0
data/.document
ADDED
data/.gitignore
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
*.bundle
|
2
|
+
*.o
|
3
|
+
*.a
|
4
|
+
output
|
5
|
+
Makefile
|
6
|
+
obj/
|
7
|
+
mkmf.log
|
8
|
+
|
9
|
+
# rcov generated
|
10
|
+
coverage
|
11
|
+
|
12
|
+
# rdoc generated
|
13
|
+
rdoc
|
14
|
+
|
15
|
+
# yard generated
|
16
|
+
doc
|
17
|
+
.yardoc
|
18
|
+
|
19
|
+
# bundler
|
20
|
+
.bundle
|
21
|
+
|
22
|
+
# jeweler generated
|
23
|
+
pkg
|
24
|
+
|
25
|
+
# Have editor/IDE/OS specific files you need to ignore? Consider using a global gitignore:
|
26
|
+
#
|
27
|
+
# * Create a file at ~/.gitignore
|
28
|
+
# * Include files you want ignored
|
29
|
+
# * Run: git config --global core.excludesfile ~/.gitignore
|
30
|
+
#
|
31
|
+
# After doing this, these files will be ignored in all your git projects,
|
32
|
+
# saving you from having to 'pollute' every project you touch with them
|
33
|
+
#
|
34
|
+
# Not sure what to needs to be ignored for particular editors/OSes? Here's some ideas to get you started. (Remember, remove the leading # of the line)
|
35
|
+
#
|
36
|
+
# For MacOS:
|
37
|
+
#
|
38
|
+
#.DS_Store
|
39
|
+
|
40
|
+
# For TextMate
|
41
|
+
#*.tmproj
|
42
|
+
#tmtags
|
43
|
+
|
44
|
+
# For emacs:
|
45
|
+
#*~
|
46
|
+
#\#*
|
47
|
+
#.\#*
|
48
|
+
|
49
|
+
# For vim:
|
50
|
+
#*.swp
|
51
|
+
|
52
|
+
# For redcar:
|
53
|
+
#.redcar
|
54
|
+
|
55
|
+
# For rubinius:
|
56
|
+
#*.rbc
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
GEM
|
2
|
+
remote: http://rubygems.org/
|
3
|
+
specs:
|
4
|
+
git (1.2.5)
|
5
|
+
jeweler (1.6.4)
|
6
|
+
bundler (~> 1.0)
|
7
|
+
git (>= 1.2.5)
|
8
|
+
rake
|
9
|
+
rake (0.9.2)
|
10
|
+
rcov (0.9.10)
|
11
|
+
shoulda (2.11.3)
|
12
|
+
|
13
|
+
PLATFORMS
|
14
|
+
ruby
|
15
|
+
|
16
|
+
DEPENDENCIES
|
17
|
+
bundler (~> 1.0.0)
|
18
|
+
jeweler (~> 1.6.4)
|
19
|
+
rcov
|
20
|
+
shoulda
|
data/LICENSE.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Public Domain
|
data/README.rdoc
ADDED
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require 'bundler/gem_tasks'
|
data/ext/Makefile
ADDED
@@ -0,0 +1,225 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = quarry
|
7
|
+
topdir = /Users/will/.rvm/rubies/ruby-1.9.2-p290/include/ruby-1.9.1
|
8
|
+
hdrdir = /Users/will/.rvm/rubies/ruby-1.9.2-p290/include/ruby-1.9.1
|
9
|
+
arch_hdrdir = /Users/will/.rvm/rubies/ruby-1.9.2-p290/include/ruby-1.9.1/$(arch)
|
10
|
+
VPATH = $(srcdir):$(arch_hdrdir)/ruby:$(hdrdir)/ruby
|
11
|
+
|
12
|
+
prefix = $(DESTDIR)/Users/will/.rvm/rubies/ruby-1.9.2-p290
|
13
|
+
|
14
|
+
rubylibprefix = $(libdir)/$(RUBY_BASE_NAME)
|
15
|
+
|
16
|
+
exec_prefix = $(prefix)
|
17
|
+
|
18
|
+
vendorhdrdir = $(rubyhdrdir)/vendor_ruby
|
19
|
+
|
20
|
+
sitehdrdir = $(rubyhdrdir)/site_ruby
|
21
|
+
|
22
|
+
rubyhdrdir = $(includedir)/$(RUBY_BASE_NAME)-$(ruby_version)
|
23
|
+
|
24
|
+
vendordir = $(rubylibprefix)/vendor_ruby
|
25
|
+
|
26
|
+
sitedir = $(rubylibprefix)/site_ruby
|
27
|
+
|
28
|
+
ridir = $(datarootdir)/$(RI_BASE_NAME)
|
29
|
+
|
30
|
+
mandir = $(datarootdir)/man
|
31
|
+
|
32
|
+
localedir = $(datarootdir)/locale
|
33
|
+
|
34
|
+
libdir = $(exec_prefix)/lib
|
35
|
+
|
36
|
+
psdir = $(docdir)
|
37
|
+
|
38
|
+
pdfdir = $(docdir)
|
39
|
+
|
40
|
+
dvidir = $(docdir)
|
41
|
+
|
42
|
+
htmldir = $(docdir)
|
43
|
+
|
44
|
+
infodir = $(datarootdir)/info
|
45
|
+
|
46
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
47
|
+
|
48
|
+
oldincludedir = $(DESTDIR)/usr/include
|
49
|
+
|
50
|
+
includedir = $(prefix)/include
|
51
|
+
|
52
|
+
localstatedir = $(prefix)/var
|
53
|
+
|
54
|
+
sharedstatedir = $(prefix)/com
|
55
|
+
|
56
|
+
sysconfdir = $(prefix)/etc
|
57
|
+
|
58
|
+
datadir = $(datarootdir)
|
59
|
+
|
60
|
+
datarootdir = $(prefix)/share
|
61
|
+
|
62
|
+
libexecdir = $(exec_prefix)/libexec
|
63
|
+
|
64
|
+
sbindir = $(exec_prefix)/sbin
|
65
|
+
|
66
|
+
bindir = $(exec_prefix)/bin
|
67
|
+
|
68
|
+
rubylibdir = $(rubylibprefix)/$(ruby_version)
|
69
|
+
|
70
|
+
archdir = $(rubylibdir)/$(arch)
|
71
|
+
|
72
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
73
|
+
|
74
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
75
|
+
|
76
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
77
|
+
|
78
|
+
vendorarchdir = $(vendorlibdir)/$(sitearch)
|
79
|
+
|
80
|
+
|
81
|
+
CC = /usr/bin/gcc-4.2
|
82
|
+
CXX = g++
|
83
|
+
LIBRUBY = $(LIBRUBY_SO)
|
84
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
85
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
86
|
+
LIBRUBYARG_STATIC = -lruby.1.9.1-static
|
87
|
+
OUTFLAG = -o
|
88
|
+
COUTFLAG = -o
|
89
|
+
|
90
|
+
RUBY_EXTCONF_H =
|
91
|
+
cflags = $(optflags) $(debugflags) $(warnflags)
|
92
|
+
optflags = -O3
|
93
|
+
debugflags = -ggdb
|
94
|
+
warnflags = -Wextra -Wno-unused-parameter -Wno-parentheses -Wpointer-arith -Wwrite-strings -Wno-missing-field-initializers -Wshorten-64-to-32 -Wno-long-long
|
95
|
+
CFLAGS = -fno-common -x c++
|
96
|
+
INCFLAGS = -I. -I$(arch_hdrdir) -I$(hdrdir)/ruby/backward -I$(hdrdir) -I$(srcdir) -I./../lib/quarry/src
|
97
|
+
DEFS =
|
98
|
+
CPPFLAGS = -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE $(DEFS) $(cppflags) -I/Users/will/.rvm/gems/ruby-1.9.2-p290/gems/rice-1.4.3/ruby/lib/include
|
99
|
+
CXXFLAGS = $(CFLAGS) -Wall -g
|
100
|
+
ldflags = -L. -L/usr/local/lib -L/Users/will/.rvm/gems/ruby-1.9.2-p290/gems/rice-1.4.3/ruby/lib/lib
|
101
|
+
dldflags = -Wl,-undefined,dynamic_lookup -Wl,-multiply_defined,suppress -Wl,-flat_namespace
|
102
|
+
ARCH_FLAG =
|
103
|
+
DLDFLAGS = $(ldflags) $(dldflags)
|
104
|
+
LDSHARED = g++ -dynamic -bundle
|
105
|
+
LDSHAREDXX = $(CXX) -dynamic -bundle
|
106
|
+
AR = ar
|
107
|
+
EXEEXT =
|
108
|
+
|
109
|
+
RUBY_BASE_NAME = ruby
|
110
|
+
RUBY_INSTALL_NAME = ruby
|
111
|
+
RUBY_SO_NAME = ruby.1.9.1
|
112
|
+
arch = x86_64-darwin11.1.0
|
113
|
+
sitearch = $(arch)
|
114
|
+
ruby_version = 1.9.1
|
115
|
+
ruby = /Users/will/.rvm/rubies/ruby-1.9.2-p290/bin/ruby
|
116
|
+
RUBY = $(ruby)
|
117
|
+
RM = rm -f
|
118
|
+
RM_RF = $(RUBY) -run -e rm -- -rf
|
119
|
+
RMDIRS = $(RUBY) -run -e rmdir -- -p
|
120
|
+
MAKEDIRS = mkdir -p
|
121
|
+
INSTALL = /usr/bin/install -c
|
122
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
123
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
124
|
+
COPY = cp
|
125
|
+
|
126
|
+
#### End of system configuration section. ####
|
127
|
+
|
128
|
+
preload =
|
129
|
+
|
130
|
+
|
131
|
+
CXX = g++
|
132
|
+
|
133
|
+
libpath = . $(libdir) ./../lib/quarry/obj
|
134
|
+
LIBPATH = -L. -L$(libdir) -L./../lib/quarry/obj
|
135
|
+
DEFFILE =
|
136
|
+
|
137
|
+
CLEANFILES = mkmf.log
|
138
|
+
DISTCLEANFILES =
|
139
|
+
DISTCLEANDIRS =
|
140
|
+
|
141
|
+
extout =
|
142
|
+
extout_prefix =
|
143
|
+
target_prefix =
|
144
|
+
LOCAL_LIBS =
|
145
|
+
LIBS = -lquarry -lrice -lruby.1.9.1 -lpthread -ldl -lobjc
|
146
|
+
SRCS = quarry_toolkit.cpp
|
147
|
+
OBJS = quarry_toolkit.o
|
148
|
+
TARGET = quarry_toolkit
|
149
|
+
DLLIB = $(TARGET).bundle
|
150
|
+
EXTSTATIC =
|
151
|
+
STATIC_LIB =
|
152
|
+
|
153
|
+
BINDIR = $(bindir)
|
154
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
155
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
156
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
157
|
+
HDRDIR = $(rubyhdrdir)/ruby$(target_prefix)
|
158
|
+
ARCHHDRDIR = $(rubyhdrdir)/$(arch)/ruby$(target_prefix)
|
159
|
+
|
160
|
+
TARGET_SO = $(DLLIB)
|
161
|
+
CLEANLIBS = $(TARGET).bundle
|
162
|
+
CLEANOBJS = *.o *.bak
|
163
|
+
|
164
|
+
all: $(DLLIB)
|
165
|
+
static: $(STATIC_LIB)
|
166
|
+
.PHONY: all install static install-so install-rb
|
167
|
+
.PHONY: clean clean-so clean-rb
|
168
|
+
|
169
|
+
clean-rb-default::
|
170
|
+
clean-rb::
|
171
|
+
clean-so::
|
172
|
+
clean: clean-so clean-rb-default clean-rb
|
173
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
174
|
+
|
175
|
+
distclean-rb-default::
|
176
|
+
distclean-rb::
|
177
|
+
distclean-so::
|
178
|
+
distclean: clean distclean-so distclean-rb-default distclean-rb
|
179
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
180
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
181
|
+
@-$(RMDIRS) $(DISTCLEANDIRS)
|
182
|
+
|
183
|
+
realclean: distclean
|
184
|
+
install: install-so install-rb
|
185
|
+
|
186
|
+
install-so: $(RUBYARCHDIR)
|
187
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
188
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
189
|
+
@-$(MAKEDIRS) $(@D)
|
190
|
+
$(INSTALL_PROG) $(DLLIB) $(@D)
|
191
|
+
install-rb: pre-install-rb install-rb-default
|
192
|
+
install-rb-default: pre-install-rb-default
|
193
|
+
pre-install-rb: Makefile
|
194
|
+
pre-install-rb-default: Makefile
|
195
|
+
$(RUBYARCHDIR):
|
196
|
+
$(MAKEDIRS) $@
|
197
|
+
|
198
|
+
site-install: site-install-so site-install-rb
|
199
|
+
site-install-so: install-so
|
200
|
+
site-install-rb: install-rb
|
201
|
+
|
202
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
203
|
+
|
204
|
+
.cc.o:
|
205
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
206
|
+
|
207
|
+
.cxx.o:
|
208
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
209
|
+
|
210
|
+
.cpp.o:
|
211
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
212
|
+
|
213
|
+
.C.o:
|
214
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(COUTFLAG)$@ -c $<
|
215
|
+
|
216
|
+
.c.o:
|
217
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) $(COUTFLAG)$@ -c $<
|
218
|
+
|
219
|
+
$(DLLIB): $(OBJS) Makefile
|
220
|
+
@-$(RM) $(@)
|
221
|
+
$(LDSHAREDXX) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
222
|
+
|
223
|
+
|
224
|
+
|
225
|
+
$(OBJS): $(hdrdir)/ruby.h $(hdrdir)/ruby/defines.h $(arch_hdrdir)/ruby/config.h
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'mkmf-rice'
|
2
|
+
|
3
|
+
QUARRY_H = 'quarry.h'
|
4
|
+
MARKER = 'mkmf_marker'
|
5
|
+
quarry_dir = File.join(File.dirname(__FILE__), '..', 'lib', 'quarry')
|
6
|
+
quarry_obj = File.join(quarry_dir, 'obj')
|
7
|
+
quarry_src = File.join(quarry_dir, 'src')
|
8
|
+
quarry_header = File.join(quarry_src, QUARRY_H)
|
9
|
+
|
10
|
+
# compile quarry as a static lib
|
11
|
+
Dir.chdir(quarry_dir) do
|
12
|
+
if RUBY_PLATFORM =~ /darwin/
|
13
|
+
puts "Compiling quarry (OS X mode)"
|
14
|
+
`make -f Makefile.osx clean`
|
15
|
+
`make -f Makefile.osx`
|
16
|
+
else
|
17
|
+
puts "Compiling quarry (linux mode)"
|
18
|
+
`make -f Makefile.linux clean`
|
19
|
+
`make -f Makefile.linux`
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# the cflags are required to make mkmf compile in c++ mode
|
24
|
+
with_cflags("-x c++") do
|
25
|
+
find_header(QUARRY_H, quarry_src)
|
26
|
+
$LIBPATH << quarry_obj
|
27
|
+
have_library('quarry', MARKER, quarry_header)
|
28
|
+
create_makefile('quarry_toolkit', 'quarry')
|
29
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
#include "rice/Constructor.hpp"
|
2
|
+
#include "rice/Data_Type.hpp"
|
3
|
+
#include "rice/Module.hpp"
|
4
|
+
#include "rice/Array.hpp"
|
5
|
+
#include "quarry.h"
|
6
|
+
using namespace Rice;
|
7
|
+
|
8
|
+
Object model_rank(Object self, Object ex) {
|
9
|
+
Model::Model *model = from_ruby<Model::Model *>(self);
|
10
|
+
DataSet::Example *example = from_ruby<DataSet::Example *>(ex);
|
11
|
+
Array indexes;
|
12
|
+
|
13
|
+
vector<Classifier::Score> *ranks = model->rank(example);
|
14
|
+
for(unsigned int i = 0; i < ranks->size(); i++)
|
15
|
+
indexes.push(ranks->at(i).category);
|
16
|
+
|
17
|
+
delete ranks;
|
18
|
+
return indexes;
|
19
|
+
}
|
20
|
+
|
21
|
+
Object model_rank_text(Object self, Object text) {
|
22
|
+
Model::Model *model = from_ruby<Model::Model *>(self);
|
23
|
+
string example_text = from_ruby<string>(text);
|
24
|
+
Array indexes;
|
25
|
+
|
26
|
+
vector<Classifier::Score> *ranks = model->rank_text(example_text);
|
27
|
+
for(unsigned int i = 0; i < ranks->size(); i++)
|
28
|
+
indexes.push(ranks->at(i).category);
|
29
|
+
|
30
|
+
delete ranks;
|
31
|
+
return indexes;
|
32
|
+
}
|
33
|
+
|
34
|
+
|
35
|
+
extern "C" {
|
36
|
+
|
37
|
+
void Init_quarry_toolkit() {
|
38
|
+
Module rb_mQuarry = define_module("Quarry");
|
39
|
+
Module rb_mDataSet = define_module_under(rb_mQuarry, "DataSet");
|
40
|
+
Module rb_mClassifier = define_module_under(rb_mQuarry, "Classifier");
|
41
|
+
Module rb_mPreprocessing = define_module_under(rb_mQuarry, "Preprocessing");
|
42
|
+
Module rb_mText = define_module_under(rb_mPreprocessing, "Text");
|
43
|
+
|
44
|
+
|
45
|
+
// text pipeline
|
46
|
+
rb_mText.define_module_function("standard_pipeline", &Preprocessing::Text::StandardPipeline);
|
47
|
+
Data_Type<Preprocessing::Text::TextPipeline> rb_cTextPipeline = define_class_under<Preprocessing::Text::TextPipeline>(rb_mQuarry, "ImplTextPipeline")
|
48
|
+
.define_constructor(Constructor<Preprocessing::Text::TextPipeline>())
|
49
|
+
.define_method("process_text", &Preprocessing::Text::TextPipeline::process_text);
|
50
|
+
|
51
|
+
// storage
|
52
|
+
Data_Type<Storage::Storage> rb_cStorage = define_class_under<Storage::Storage>(rb_mQuarry, "ImplStorage");
|
53
|
+
|
54
|
+
Data_Type<Storage::ARFF> rb_cARFF = define_class_under<Storage::ARFF, Storage::Storage>(rb_mQuarry, "ImplARFF")
|
55
|
+
.define_constructor(Constructor<Storage::ARFF, string>())
|
56
|
+
.define_method("read", &Storage::ARFF::read);
|
57
|
+
|
58
|
+
Data_Type<Storage::Binary> rb_cBinary = define_class_under<Storage::Binary, Storage::Storage>(rb_mQuarry, "ImplBinary")
|
59
|
+
.define_constructor(Constructor<Storage::Binary, string>())
|
60
|
+
.define_method("read", &Storage::Binary::read)
|
61
|
+
.define_method("write", &Storage::Binary::write)
|
62
|
+
.define_method("read_model", &Storage::Binary::read_model)
|
63
|
+
.define_method("write_model", &Storage::Binary::write_model)
|
64
|
+
.define_method("get_write_examples", &Storage::Binary::get_write_examples)
|
65
|
+
.define_method("set_write_examples", &Storage::Binary::set_write_examples);
|
66
|
+
|
67
|
+
Data_Type<Storage::Folders> rb_cFolders = define_class_under<Storage::Folders, Storage::Storage>(rb_mQuarry, "ImplFolders")
|
68
|
+
.define_constructor(Constructor<Storage::Folders, string, Preprocessing::Text::TextPipeline *>())
|
69
|
+
.define_method("read", &Storage::Folders::read);
|
70
|
+
|
71
|
+
|
72
|
+
// model
|
73
|
+
Data_Type<Model::Model> rb_cModel = define_class_under<Model::Model>(rb_mQuarry, "ImplModel")
|
74
|
+
.define_constructor(Constructor<Model::Model>())
|
75
|
+
.define_method("train", &Model::Model::train)
|
76
|
+
.define_method("train_text", &Model::Model::train_text)
|
77
|
+
.define_method("classify", &Model::Model::classify)
|
78
|
+
.define_method("classify_text", &Model::Model::classify_text)
|
79
|
+
.define_method("set_data_set", &Model::Model::set_data_set)
|
80
|
+
.define_method("get_data_set", &Model::Model::get_data_set)
|
81
|
+
.define_method("set_classifier", &Model::Model::set_classifier)
|
82
|
+
.define_method("get_classifier", &Model::Model::get_classifier)
|
83
|
+
.define_method("set_text_pipeline", &Model::Model::set_text_pipeline)
|
84
|
+
.define_method("get_text_pipeline", &Model::Model::get_text_pipeline)
|
85
|
+
.define_method("rank", &model_rank)
|
86
|
+
.define_method("rank_text", &model_rank_text);
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
// data set
|
91
|
+
Data_Type<DataSet::Feature> rb_cDataSetFeature = define_class_under<DataSet::Feature>(rb_mDataSet, "ImplFeature")
|
92
|
+
.define_method("get_name", &DataSet::Feature::get_name)
|
93
|
+
.define_method("set_name", &DataSet::Feature::set_name)
|
94
|
+
.define_constructor(Constructor<DataSet::Feature, string, int>());
|
95
|
+
|
96
|
+
Data_Type<DataSet::Example> rb_cDataSetExample = define_class_under<DataSet::Example>(rb_mDataSet, "ImplExample")
|
97
|
+
.define_method("category_index", &DataSet::Example::category_index)
|
98
|
+
.define_method("get_value", &DataSet::Example::get_value)
|
99
|
+
.define_method("set_value", &DataSet::Example::set_value)
|
100
|
+
.define_constructor(Constructor<DataSet::Example, int>());
|
101
|
+
|
102
|
+
Data_Type<DataSet::DataSet> rb_cDataSet = define_class_under<DataSet::DataSet>(rb_mDataSet, "ImplDataSet")
|
103
|
+
.define_constructor(Constructor<DataSet::DataSet>())
|
104
|
+
.define_method("get_name", &DataSet::DataSet::get_name)
|
105
|
+
.define_method("set_name", &DataSet::DataSet::set_name)
|
106
|
+
.define_method("features_size", &DataSet::DataSet::features_size)
|
107
|
+
.define_method("examples_size", &DataSet::DataSet::examples_size)
|
108
|
+
.define_method("get_feature_by_index", &DataSet::DataSet::get_feature_by_index)
|
109
|
+
.define_method("get_feature_by_name", &DataSet::DataSet::get_feature_by_name)
|
110
|
+
.define_method("get_example_by_index", &DataSet::DataSet::get_example_by_index)
|
111
|
+
.define_method("stratify", &DataSet::DataSet::stratify)
|
112
|
+
.define_method("cross_fold_validation", &DataSet::DataSet::cross_fold_validation);
|
113
|
+
|
114
|
+
|
115
|
+
// abstract classifier
|
116
|
+
Data_Type<Classifier::Classifier> rb_cClassifierClassifier = define_class_under<Classifier::Classifier>(rb_mClassifier, "ImplClassifier")
|
117
|
+
.define_method("prepare", &Classifier::Classifier::prepare)
|
118
|
+
.define_method("classify", &Classifier::Classifier::classify);
|
119
|
+
|
120
|
+
|
121
|
+
// bayesian classifiers
|
122
|
+
Data_Type<Classifier::NaiveBayesClassifier> rb_cClassifierNaiveBayesClassifier = define_class_under<Classifier::NaiveBayesClassifier, Classifier::Classifier>(rb_mClassifier, "ImplNaiveBayesClassifier")
|
123
|
+
.define_constructor(Constructor<Classifier::NaiveBayesClassifier, DataSet::DataSet *>());
|
124
|
+
|
125
|
+
|
126
|
+
// confusion matrix
|
127
|
+
Data_Type<ConfusionMatrix> rb_cConfusionMatrix = define_class_under<ConfusionMatrix>(rb_mQuarry, "ImplConfusionMatrix")
|
128
|
+
.define_constructor(Constructor<ConfusionMatrix, DataSet::DataSet *>())
|
129
|
+
.define_method("accuracy", &ConfusionMatrix::accuracy)
|
130
|
+
.define_method("error", &ConfusionMatrix::error)
|
131
|
+
.define_method("print_summary", &ConfusionMatrix::print_summary)
|
132
|
+
.define_method("avg_tp", &ConfusionMatrix::avg_tp)
|
133
|
+
.define_method("avg_fp", &ConfusionMatrix::avg_fp)
|
134
|
+
.define_method("avg_tn", &ConfusionMatrix::avg_tn)
|
135
|
+
.define_method("avg_fn", &ConfusionMatrix::avg_fn)
|
136
|
+
.define_method("avg_precision", &ConfusionMatrix::avg_precision)
|
137
|
+
.define_method("avg_recall", &ConfusionMatrix::avg_recall)
|
138
|
+
.define_method("avg_fscore", &ConfusionMatrix::avg_fscore)
|
139
|
+
.define_method("add", &ConfusionMatrix::add)
|
140
|
+
.define_method("tp", &ConfusionMatrix::tp)
|
141
|
+
.define_method("fp", &ConfusionMatrix::tp)
|
142
|
+
.define_method("tn", &ConfusionMatrix::tp)
|
143
|
+
.define_method("fn", &ConfusionMatrix::tp)
|
144
|
+
.define_method("precision", &ConfusionMatrix::tp)
|
145
|
+
.define_method("recall", &ConfusionMatrix::tp)
|
146
|
+
.define_method("fscore", &ConfusionMatrix::tp);
|
147
|
+
}
|
148
|
+
}
|
@@ -0,0 +1,23 @@
|
|
1
|
+
SRCS = src/quarry.cpp src/data_set/data_set.cpp src/data_set/example.cpp src/data_set/sparse/sparse_example.cpp src/data_set/features/nominal_feature.cpp src/data_set/features/numeric_feature.cpp src/classifier/classifier.cpp src/classifier/naive_bayes/naive_bayes_classifier.cpp src/metrics/confusion_matrix.cpp src/storage/arff.cpp src/storage/folders.cpp src/storage/binary.cpp src/preprocessing/text/text_pipeline.cpp src/preprocessing/text/inplace_processor/porter_stemmer_original.cpp src/preprocessing/text/token_selector/stop_words.cpp src/preprocessing/text/tokeniser/simple_tokeniser.cpp src/model/model.cpp
|
2
|
+
OBJS = ${SRCS:.cpp=.o}
|
3
|
+
INCFLAGS = -Isrc
|
4
|
+
CPPFLAGS += -O3
|
5
|
+
LIB = libquarry.a
|
6
|
+
|
7
|
+
.SUFFIXES:
|
8
|
+
.SUFFIXES: .cpp .o
|
9
|
+
|
10
|
+
.cpp.o :
|
11
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) -o $@ -c $<
|
12
|
+
|
13
|
+
all : $(LIB)
|
14
|
+
|
15
|
+
$(LIB) : $(OBJS)
|
16
|
+
$(STATIC_LIB) obj/$(LIB) $(OBJS)
|
17
|
+
|
18
|
+
test : $(LIB)
|
19
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) -o obj/test obj/$(LIB) src/test.cpp
|
20
|
+
|
21
|
+
clean :
|
22
|
+
rm -f $(OBJS)
|
23
|
+
rm -f obj/*
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#include "classifier.h"
|
2
|
+
|
3
|
+
int Classifier::Classifier::classify(DataSet::Example *example) {
|
4
|
+
double max_score = 0.0, category_score = 0.0;
|
5
|
+
int max_category = 1;
|
6
|
+
|
7
|
+
for(int category = 1; category <= data_set->categories_size(); category++) {
|
8
|
+
category_score = score(category, example);
|
9
|
+
if(category_score > max_score) {
|
10
|
+
max_score = category_score;
|
11
|
+
max_category = category;
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
return max_category;
|
16
|
+
}
|
17
|
+
|
18
|
+
vector<Classifier::Score> *Classifier::Classifier::rank(DataSet::Example *example) {
|
19
|
+
vector<Score> *scores = score_all(example);
|
20
|
+
sort(scores->begin(), scores->end(), Score::compare);
|
21
|
+
return scores;
|
22
|
+
}
|
23
|
+
|
24
|
+
vector<Classifier::Score> *Classifier::Classifier::score_all(DataSet::Example *example) {
|
25
|
+
vector<Score> *scores = new vector<Score>();
|
26
|
+
scores->reserve(data_set->categories_size());
|
27
|
+
|
28
|
+
for(int category = 1; category <= data_set->categories_size(); category++)
|
29
|
+
scores->push_back(Score(category, score(category, example)));
|
30
|
+
|
31
|
+
return scores;
|
32
|
+
}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#ifndef __classifier__
|
2
|
+
#define __classifier__
|
3
|
+
#include "data_set/data_set.h"
|
4
|
+
#include <algorithm>
|
5
|
+
#include <utility>
|
6
|
+
#include <vector>
|
7
|
+
#include <iostream>
|
8
|
+
#include <typeinfo>
|
9
|
+
using namespace std;
|
10
|
+
|
11
|
+
namespace Storage {
|
12
|
+
class Binary;
|
13
|
+
}
|
14
|
+
|
15
|
+
namespace Classifier {
|
16
|
+
|
17
|
+
class Score {
|
18
|
+
public:
|
19
|
+
int category;
|
20
|
+
double score;
|
21
|
+
|
22
|
+
Score(int category, double score) : category(category), score(score) {}
|
23
|
+
static bool compare (Score a, Score b) {
|
24
|
+
return b.score < a.score; // descending
|
25
|
+
}
|
26
|
+
};
|
27
|
+
|
28
|
+
|
29
|
+
class Classifier {
|
30
|
+
public:
|
31
|
+
DataSet::DataSet *data_set;
|
32
|
+
vector<bool> numeric_features;
|
33
|
+
vector<bool> nominal_features;
|
34
|
+
|
35
|
+
Classifier(DataSet::DataSet *data_set) : data_set(data_set), numeric_features(data_set->features_size(), 0), nominal_features(data_set->features_size(), 0) {
|
36
|
+
DataSet::Feature *feature = NULL;
|
37
|
+
|
38
|
+
for(unsigned int i = 0; i < data_set->features.size(); i++) {
|
39
|
+
feature = data_set->features[i];
|
40
|
+
if(typeid(*feature) == typeid(DataSet::NumericFeature))
|
41
|
+
numeric_features[i] = true;
|
42
|
+
else
|
43
|
+
nominal_features[i] = true;
|
44
|
+
}
|
45
|
+
}
|
46
|
+
|
47
|
+
virtual void prepare() {};
|
48
|
+
virtual Classifier *clone(DataSet::DataSet *new_data_set) = 0;
|
49
|
+
virtual double score(int category, DataSet::Example *example) = 0;
|
50
|
+
virtual int classify(DataSet::Example *example);
|
51
|
+
virtual vector<Score> *rank(DataSet::Example *example);
|
52
|
+
virtual vector<Score> *score_all(DataSet::Example *example);
|
53
|
+
virtual void write_binary(Storage::Binary *file) {}
|
54
|
+
virtual void read_binary(Storage::Binary *file) {}
|
55
|
+
virtual uint32_t mark() = 0;
|
56
|
+
};
|
57
|
+
}
|
58
|
+
|
59
|
+
#endif
|
File without changes
|
File without changes
|