RubyGems - eluka - Versions diffs - 0.1.0 - Mend

eluka 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

data/.document +5 -0
data/DOCUMENTATION_STANDARDS +39 -0
data/Gemfile +13 -0
data/Gemfile.lock +20 -0
data/LICENSE.txt +20 -0
data/README.rdoc +19 -0
data/Rakefile +69 -0
data/VERSION +1 -0
data/examples/example.rb +59 -0
data/ext/libsvm/COPYRIGHT +31 -0
data/ext/libsvm/FAQ.html +1749 -0
data/ext/libsvm/Makefile +25 -0
data/ext/libsvm/Makefile.win +33 -0
data/ext/libsvm/README +733 -0
data/ext/libsvm/extconf.rb +1 -0
data/ext/libsvm/heart_scale +270 -0
data/ext/libsvm/java/Makefile +25 -0
data/ext/libsvm/java/libsvm.jar +0 -0
data/ext/libsvm/java/libsvm/svm.java +2776 -0
data/ext/libsvm/java/libsvm/svm.m4 +2776 -0
data/ext/libsvm/java/libsvm/svm_model.java +21 -0
data/ext/libsvm/java/libsvm/svm_node.java +6 -0
data/ext/libsvm/java/libsvm/svm_parameter.java +47 -0
data/ext/libsvm/java/libsvm/svm_print_interface.java +5 -0
data/ext/libsvm/java/libsvm/svm_problem.java +7 -0
data/ext/libsvm/java/svm_predict.java +163 -0
data/ext/libsvm/java/svm_scale.java +350 -0
data/ext/libsvm/java/svm_toy.java +471 -0
data/ext/libsvm/java/svm_train.java +318 -0
data/ext/libsvm/java/test_applet.html +1 -0
data/ext/libsvm/python/Makefile +4 -0
data/ext/libsvm/python/README +331 -0
data/ext/libsvm/python/svm.py +259 -0
data/ext/libsvm/python/svmutil.py +242 -0
data/ext/libsvm/svm-predict.c +226 -0
data/ext/libsvm/svm-scale.c +353 -0
data/ext/libsvm/svm-toy/gtk/Makefile +22 -0
data/ext/libsvm/svm-toy/gtk/callbacks.cpp +423 -0
data/ext/libsvm/svm-toy/gtk/callbacks.h +54 -0
data/ext/libsvm/svm-toy/gtk/interface.c +164 -0
data/ext/libsvm/svm-toy/gtk/interface.h +14 -0
data/ext/libsvm/svm-toy/gtk/main.c +23 -0
data/ext/libsvm/svm-toy/gtk/svm-toy.glade +238 -0
data/ext/libsvm/svm-toy/qt/Makefile +17 -0
data/ext/libsvm/svm-toy/qt/svm-toy.cpp +413 -0
data/ext/libsvm/svm-toy/windows/svm-toy.cpp +456 -0
data/ext/libsvm/svm-train.c +376 -0
data/ext/libsvm/svm.cpp +3060 -0
data/ext/libsvm/svm.def +19 -0
data/ext/libsvm/svm.h +105 -0
data/ext/libsvm/svm.o +0 -0
data/ext/libsvm/tools/README +149 -0
data/ext/libsvm/tools/checkdata.py +108 -0
data/ext/libsvm/tools/easy.py +79 -0
data/ext/libsvm/tools/grid.py +359 -0
data/ext/libsvm/tools/subset.py +146 -0
data/ext/libsvm/windows/libsvm.dll +0 -0
data/ext/libsvm/windows/svm-predict.exe +0 -0
data/ext/libsvm/windows/svm-scale.exe +0 -0
data/ext/libsvm/windows/svm-toy.exe +0 -0
data/ext/libsvm/windows/svm-train.exe +0 -0
data/lib/eluka.rb +10 -0
data/lib/eluka/bijection.rb +23 -0
data/lib/eluka/data_point.rb +36 -0
data/lib/eluka/document.rb +47 -0
data/lib/eluka/feature_vector.rb +86 -0
data/lib/eluka/features.rb +31 -0
data/lib/eluka/model.rb +129 -0
data/lib/fselect.rb +321 -0
data/lib/grid.rb +25 -0
data/test/helper.rb +18 -0
data/test/test_eluka.rb +7 -0
metadata +214 -0

data/ext/libsvm/svm.def ADDED Viewed

@@ -0,0 +1,19 @@
+LIBRARY libsvm
+EXPORTS
+	svm_train	@1
+	svm_cross_validation	@2
+	svm_save_model	@3
+	svm_load_model	@4
+	svm_get_svm_type	@5
+	svm_get_nr_class	@6
+	svm_get_labels	@7
+	svm_get_svr_probability	@8
+	svm_predict_values	@9
+	svm_predict	@10
+	svm_predict_probability	@11
+	svm_free_model_content	@12
+	svm_free_and_destroy_model	@13
+	svm_destroy_param	@14
+	svm_check_parameter	@15
+	svm_check_probability_model	@16
+	svm_set_print_string_function	@17

data/ext/libsvm/svm.h ADDED Viewed

@@ -0,0 +1,105 @@
+#ifndef _LIBSVM_H
+#define _LIBSVM_H
+#define LIBSVM_VERSION 300
+#ifdef __cplusplus
+extern "C" {
+#endif
+extern int libsvm_version;
+struct svm_node
+{
+	int index;
+	double value;
+};
+struct svm_problem
+{
+	int l;
+	double *y;
+	struct svm_node **x;
+};
+enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
+enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
+struct svm_parameter
+{
+	int svm_type;
+	int kernel_type;
+	int degree;	/* for poly */
+	double gamma;	/* for poly/rbf/sigmoid */
+	double coef0;	/* for poly/sigmoid */
+	/* these are for training only */
+	double cache_size; /* in MB */
+	double eps;	/* stopping criteria */
+	double C;	/* for C_SVC, EPSILON_SVR and NU_SVR */
+	int nr_weight;		/* for C_SVC */
+	int *weight_label;	/* for C_SVC */
+	double* weight;		/* for C_SVC */
+	double nu;	/* for NU_SVC, ONE_CLASS, and NU_SVR */
+	double p;	/* for EPSILON_SVR */
+	int shrinking;	/* use the shrinking heuristics */
+	int probability; /* do probability estimates */
+};
+//
+// svm_model
+//
+struct svm_model
+{
+	struct svm_parameter param;	/* parameter */
+	int nr_class;		/* number of classes, = 2 in regression/one class svm */
+	int l;			/* total #SV */
+	struct svm_node **SV;		/* SVs (SV[l]) */
+	double **sv_coef;	/* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
+	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
+	double *probA;		/* pariwise probability information */
+	double *probB;
+	/* for classification only */
+	int *label;		/* label of each class (label[k]) */
+	int *nSV;		/* number of SVs for each class (nSV[k]) */
+				/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
+	/* XXX */
+	int free_sv;		/* 1 if svm_model is created by svm_load_model*/
+				/* 0 if svm_model is created by svm_train */
+};
+struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
+void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
+int svm_save_model(const char *model_file_name, const struct svm_model *model);
+struct svm_model *svm_load_model(const char *model_file_name);
+int svm_get_svm_type(const struct svm_model *model);
+int svm_get_nr_class(const struct svm_model *model);
+void svm_get_labels(const struct svm_model *model, int *label);
+double svm_get_svr_probability(const struct svm_model *model);
+double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
+double svm_predict(const struct svm_model *model, const struct svm_node *x);
+double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
+void svm_free_model_content(struct svm_model *model_ptr);
+void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
+void svm_destroy_param(struct svm_parameter *param);
+const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
+int svm_check_probability_model(const struct svm_model *model);
+void svm_set_print_string_function(void (*print_func)(const char *));
+// deprecated
+// this function will be removed in future release
+void svm_destroy_model(struct svm_model *model_ptr);
+#ifdef __cplusplus
+}
+#endif
+#endif /* _LIBSVM_H */

data/ext/libsvm/svm.o ADDED Viewed

Binary file

data/ext/libsvm/tools/README ADDED Viewed

@@ -0,0 +1,149 @@
+This directory includes some useful codes:
+1. subset selection tools.
+2. parameter selection tools.
+3. LIBSVM format checking tools
+Part I: Subset selection tools
+Introduction
+============
+Training large data is time consuming. Sometimes one should work on a
+smaller subset first. The python script subset.py randomly selects a
+specified number of samples. For classification data, we provide a
+stratified selection to ensure the same class distribution in the
+subset.
+Usage: subset.py [options] dataset number [output1] [output2]
+This script selects a subset of the given data set.
+options:
+-s method : method of selection (default 0)
+     0 -- stratified selection (classification only)
+     1 -- random selection
+output1 : the subset (optional)
+output2 : the rest of data (optional)
+If output1 is omitted, the subset will be printed on the screen.
+Example
+=======
+> python subset.py heart_scale 100 file1 file2
+From heart_scale 100 samples are randomly selected and stored in
+file1. All remaining instances are stored in file2.
+Part II: Parameter Selection Tools
+Introduction
+============
+grid.py is a parameter selection tool for C-SVM classification using
+the RBF (radial basis function) kernel. It uses cross validation (CV)
+technique to estimate the accuracy of each parameter combination in
+the specified range and helps you to decide the best parameters for
+your problem.
+grid.py directly executes libsvm binaries (so no python binding is needed)
+for cross validation and then draw contour of CV accuracy using gnuplot.
+You must have libsvm and gnuplot installed before using it. The package
+gnuplot is available at http://www.gnuplot.info/
+On Mac OSX, the precompiled gnuplot file needs the library Aquarterm,
+which thus must be installed as well. In addition, this version of
+gnuplot does not support png, so you need to change "set term png
+transparent small" and use other image formats. For example, you may
+have "set term pbm small color".
+Usage: grid.py [-log2c begin,end,step] [-log2g begin,end,step] [-v fold]
+       [-svmtrain pathname] [-gnuplot pathname] [-out pathname] [-png pathname]
+       [additional parameters for svm-train] dataset
+The program conducts v-fold cross validation using parameter C (and gamma)
+= 2^begin, 2^(begin+step), ..., 2^end.
+You can specify where the libsvm executable and gnuplot are using the
+-svmtrain and -gnuplot parameters.
+For windows users, please use pgnuplot.exe. If you are using gnuplot
+3.7.1, please upgrade to version 3.7.3 or higher. The version 3.7.1
+has a bug. If you use cygwin on windows, please use gunplot-x11.
+Example
+=======
+> python grid.py -log2c -5,5,1 -log2g -4,0,1 -v 5 -m 300 heart_scale
+Users (in particular MS Windows users) may need to specify the path of
+executable files. You can either change paths in the beginning of
+grid.py or specify them in the command line. For example,
+> grid.py -log2c -5,5,1 -svmtrain c:\libsvm\windows\svm-train.exe -gnuplot c:\tmp\gnuplot\bin\pgnuplot.exe -v 10 heart_scale
+Output: two files
+dataset.png: the CV accuracy contour plot generated by gnuplot
+dataset.out: the CV accuracy at each (log2(C),log2(gamma))
+Parallel grid search
+====================
+You can conduct a parallel grid search by dispatching jobs to a
+cluster of computers which share the same file system. First, you add
+machine names in grid.py:
+ssh_workers = ["linux1", "linux5", "linux5"]
+and then setup your ssh so that the authentication works without
+asking a password.
+The same machine (e.g., linux5 here) can be listed more than once if
+it has multiple CPUs or has more RAM. If the local machine is the
+best, you can also enlarge the nr_local_worker. For example:
+nr_local_worker = 2
+Example:
+> python grid.py heart_scale
+[local] -1 -1 78.8889  (best c=0.5, g=0.5, rate=78.8889)
+[linux5] -1 -7 83.3333  (best c=0.5, g=0.0078125, rate=83.3333)
+[linux5] 5 -1 77.037  (best c=0.5, g=0.0078125, rate=83.3333)
+[linux1] 5 -7 83.3333  (best c=0.5, g=0.0078125, rate=83.3333)
+.
+.
+.
+If -log2c, -log2g, or -v is not specified, default values are used.
+If your system uses telnet instead of ssh, you list the computer names
+in telnet_workers.
+Part III: LIBSVM format checking tools
+Introduction
+============
+`svm-train' conducts only a simple check of the input data. To do a
+detailed check, we provide a python script `checkdata.py.'
+Usage: checkdata.py dataset
+Exit status (returned value): 1 if there are errors, 0 otherwise.
+This tool is written by Rong-En Fan at National Taiwan University.
+Example
+=======
+> cat bad_data
+1 3:1 2:4
+> python checkdata.py bad_data
+line 1: feature indices must be in an ascending order, previous/current features 3:1 2:4
+Found 1 lines with error.

data/ext/libsvm/tools/checkdata.py ADDED Viewed

@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+#
+# A format checker for LIBSVM
+#
+#
+# Copyright (c) 2007, Rong-En Fan
+#
+# All rights reserved.
+#
+# This program is distributed under the same license of the LIBSVM package.
+#
+from sys import argv, exit
+import os.path
+def err(line_no, msg):
+	print("line %d: %s" % (line_no, msg))
+# works like float() but does not accept nan and inf
+def my_float(x):
+	if x.lower().find("nan") != -1 or x.lower().find("inf") != -1:
+		raise ValueError
+	return float(x)
+def main():
+	if len(argv) != 2:
+		print("Usage: %s dataset" % (argv[0]))
+		exit(1)
+	dataset = argv[1]
+	if not os.path.exists(dataset):
+		print("dataset %s not found" % (dataset))
+		exit(1)
+	line_no = 1
+	error_line_count = 0
+	for line in open(dataset, 'r'):
+		line_error = False
+		# each line must end with a newline character
+		if line[-1] != '\n':
+			err(line_no, "missing a newline character in the end")
+			line_error = True
+		nodes = line.split()
+		# check label
+		try:
+			label = nodes.pop(0)
+			if label.find(',') != -1:
+				# multi-label format
+				try:
+					for l in label.split(','):
+						l = my_float(l)
+				except:
+					err(line_no, "label %s is not a valid multi-label form" % label)
+					line_error = True
+			else:
+				try:
+					label = my_float(label)
+				except:
+					err(line_no, "label %s is not a number" % label)
+					line_error = True
+		except:
+			err(line_no, "missing label, perhaps an empty line?")
+			line_error = True
+		# check features
+		prev_index = -1
+		for i in range(len(nodes)):
+			try:
+				(index, value) =  nodes[i].split(':')
+				index = int(index)
+				value = my_float(value)
+				# precomputed kernel's index starts from 0 and LIBSVM
+				# checks it. Hence, don't treat index 0 as an error.
+				if index < 0:
+					err(line_no, "feature index must be positive; wrong feature %s" % nodes[i])
+					line_error = True
+				elif index < prev_index:
+					err(line_no, "feature indices must be in an ascending order, previous/current features %s %s" % (nodes[i-1], nodes[i]))
+					line_error = True
+				prev_index = index
+			except:
+				err(line_no, "feature '%s' not an <index>:<value> pair, <index> integer, <value> real number " % nodes[i])
+				line_error = True
+		line_no += 1
+		if line_error:
+			error_line_count += 1
+	if error_line_count > 0:
+		print("Found %d lines with error." % (error_line_count))
+		return 1
+	else:
+		print("No error.")
+		return 0
+if __name__ == "__main__":
+	exit(main())

data/ext/libsvm/tools/easy.py ADDED Viewed

@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+import sys
+import os
+from subprocess import *
+if len(sys.argv) <= 1:
+	print('Usage: %s training_file [testing_file]' % sys.argv[0])
+	raise SystemExit
+# svm, grid, and gnuplot executable files
+is_win32 = (sys.platform == 'win32')
+if not is_win32:
+	svmscale_exe = "../svm-scale"
+	svmtrain_exe = "../svm-train"
+	svmpredict_exe = "../svm-predict"
+	grid_py = "./grid.py"
+	gnuplot_exe = "/usr/bin/gnuplot"
+else:
+        # example for windows
+	svmscale_exe = r"..\windows\svm-scale.exe"
+	svmtrain_exe = r"..\windows\svm-train.exe"
+	svmpredict_exe = r"..\windows\svm-predict.exe"
+	gnuplot_exe = r"c:\tmp\gnuplot\bin\pgnuplot.exe"
+	grid_py = r".\grid.py"
+assert os.path.exists(svmscale_exe),"svm-scale executable not found"
+assert os.path.exists(svmtrain_exe),"svm-train executable not found"
+assert os.path.exists(svmpredict_exe),"svm-predict executable not found"
+assert os.path.exists(gnuplot_exe),"gnuplot executable not found"
+assert os.path.exists(grid_py),"grid.py not found"
+train_pathname = sys.argv[1]
+assert os.path.exists(train_pathname),"training file not found"
+file_name = os.path.split(train_pathname)[1]
+scaled_file = file_name + ".scale"
+model_file = file_name + ".model"
+range_file = file_name + ".range"
+if len(sys.argv) > 2:
+	test_pathname = sys.argv[2]
+	file_name = os.path.split(test_pathname)[1]
+	assert os.path.exists(test_pathname),"testing file not found"
+	scaled_test_file = file_name + ".scale"
+	predict_test_file = file_name + ".predict"
+cmd = '%s -s "%s" "%s" > "%s"' % (svmscale_exe, range_file, train_pathname, scaled_file)
+print('Scaling training data...')
+Popen(cmd, shell = True, stdout = PIPE).communicate()
+cmd = '%s -svmtrain "%s" -gnuplot "%s" "%s"' % (grid_py, svmtrain_exe, gnuplot_exe, scaled_file)
+print('Cross validation...')
+f = Popen(cmd, shell = True, stdout = PIPE).stdout
+line = ''
+while True:
+	last_line = line
+	line = f.readline()
+	if not line: break
+c,g,rate = map(float,last_line.split())
+print('Best c=%s, g=%s CV rate=%s' % (c,g,rate))
+cmd = '%s -c %s -g %s "%s" "%s"' % (svmtrain_exe,c,g,scaled_file,model_file)
+print('Training...')
+Popen(cmd, shell = True, stdout = PIPE).communicate()
+print('Output model: %s' % model_file)
+if len(sys.argv) > 2:
+	cmd = '%s -r "%s" "%s" > "%s"' % (svmscale_exe, range_file, test_pathname, scaled_test_file)
+	print('Scaling testing data...')
+	Popen(cmd, shell = True, stdout = PIPE).communicate()
+	cmd = '%s "%s" "%s" "%s"' % (svmpredict_exe, scaled_test_file, model_file, predict_test_file)
+	print('Testing...')
+	Popen(cmd, shell = True).communicate()
+	print('Output prediction: %s' % predict_test_file)