flock 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/cluster.h ADDED
@@ -0,0 +1,93 @@
1
+ /******************************************************************************/
2
+ /* The C Clustering Library.
3
+ * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
4
+ *
5
+ * This library was written at the Laboratory of DNA Information Analysis,
6
+ * Human Genome Center, Institute of Medical Science, University of Tokyo,
7
+ * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
8
+ * Contact: mdehoon 'AT' gsc.riken.jp
9
+ *
10
+ * Permission to use, copy, modify, and distribute this software and its
11
+ * documentation with or without modifications and for any purpose and
12
+ * without fee is hereby granted, provided that any copyright notices
13
+ * appear in all copies and that both those copyright notices and this
14
+ * permission notice appear in supporting documentation, and that the
15
+ * names of the contributors or copyright holders not be used in
16
+ * advertising or publicity pertaining to distribution of the software
17
+ * without specific prior permission.
18
+ *
19
+ * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
20
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
21
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
22
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
23
+ * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
24
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
25
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
26
+ * OR PERFORMANCE OF THIS SOFTWARE.
27
+ *
28
+ */
29
+
30
+ #ifndef min
31
+ #define min(x, y) ((x) < (y) ? (x) : (y))
32
+ #endif
33
+ #ifndef max
34
+ #define max(x, y) ((x) > (y) ? (x) : (y))
35
+ #endif
36
+
37
+ #ifdef WINDOWS
38
+ # include <windows.h>
39
+ #endif
40
+
41
+ #define CLUSTERVERSION "1.50"
42
+
43
+ /* Chapter 2 */
44
+ double clusterdistance (int nrows, int ncolumns, double** data, int** mask,
45
+ double weight[], int n1, int n2, int index1[], int index2[], char dist,
46
+ char method, int transpose);
47
+ double** distancematrix (int ngenes, int ndata, double** data,
48
+ int** mask, double* weight, char dist, int transpose);
49
+
50
+ /* Chapter 3 */
51
+ int getclustercentroids(int nclusters, int nrows, int ncolumns,
52
+ double** data, int** mask, int clusterid[], double** cdata, int** cmask,
53
+ int transpose, char method);
54
+ void getclustermedoids(int nclusters, int nelements, double** distance,
55
+ int clusterid[], int centroids[], double errors[]);
56
+ void kcluster (int nclusters, int ngenes, int ndata, double** data,
57
+ int** mask, double weight[], int transpose, int npass, char method, char dist,
58
+ int clusterid[], double* error, int* ifound);
59
+ void kmedoids (int nclusters, int nelements, double** distance,
60
+ int npass, int clusterid[], double* error, int* ifound);
61
+
62
+ /* Chapter 4 */
63
+ typedef struct {int left; int right; double distance;} Node;
64
+ /*
65
+ * A Node struct describes a single node in a tree created by hierarchical
66
+ * clustering. The tree can be represented by an array of n Node structs,
67
+ * where n is the number of elements minus one. The integers left and right
68
+ * in each Node struct refer to the two elements or subnodes that are joined
69
+ * in this node. The original elements are numbered 0..nelements-1, and the
70
+ * nodes -1..-(nelements-1). For each node, distance contains the distance
71
+ * between the two subnodes that were joined.
72
+ */
73
+
74
+ Node* treecluster (int nrows, int ncolumns, double** data, int** mask,
75
+ double weight[], int transpose, char dist, char method, double** distmatrix);
76
+ void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
77
+
78
+ /* Chapter 5 */
79
+ void somcluster (int nrows, int ncolumns, double** data, int** mask,
80
+ const double weight[], int transpose, int nxnodes, int nynodes,
81
+ double inittau, int niter, char dist, double*** celldata,
82
+ int clusterid[][2]);
83
+
84
+ /* Chapter 6 */
85
+ int pca(int m, int n, double** u, double** v, double* w);
86
+
87
+ /* Utility routines, currently undocumented */
88
+ void sort(int n, const double data[], int index[]);
89
+ double mean(int n, double x[]);
90
+ double median (int n, double x[]);
91
+
92
+ double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
93
+ double weights[], int transpose, char dist, double cutoff, double exponent);
data/ext/extconf.rb ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'mkmf'
4
+ $CFLAGS = '-fPIC -Os -Wall'
5
+ create_makefile('flock')
data/ext/flock.c ADDED
@@ -0,0 +1,118 @@
1
+ #include <ruby/ruby.h>
2
+ #include "cluster.h"
3
+
4
+ #define ID_CONST_GET rb_intern("const_get")
5
+ #define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
6
+
7
+ static VALUE mFlock;
8
+
9
+ int opt_int_value(VALUE option, char *key, int def) {
10
+ if (NIL_P(option)) return def;
11
+
12
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
13
+ return NIL_P(value) ? def : NUM2INT(value);
14
+ }
15
+
16
+ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
17
+ VALUE size, data, mask, weights, options;
18
+ rb_scan_args(argc, argv, "31", &size, &data, &mask, &options);
19
+
20
+ int i,j;
21
+ int nrows = RARRAY_LEN(data);
22
+ int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
23
+
24
+ double **cdata = (double**)malloc(sizeof(double*)*nrows);
25
+ int **cmask = (int **)malloc(sizeof(int *)*nrows);
26
+ double **ccentroid = (double**)malloc(sizeof(double*)*nrows);
27
+ int **ccentroid_mask = (int **)malloc(sizeof(int *)*nrows);
28
+ double *cweights = (double *)malloc(sizeof(double )*ncols);
29
+ int *ccluster = (int *)malloc(sizeof(int )*nrows);
30
+
31
+ for (i = 0; i < nrows; i++) {
32
+ cdata[i] = (double*)malloc(sizeof(double)*ncols);
33
+ cmask[i] = (int *)malloc(sizeof(int )*ncols);
34
+ ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
35
+ ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
36
+ for (j = 0; j < ncols; j++) {
37
+ cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
38
+ cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
39
+ }
40
+ }
41
+
42
+ weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
43
+ for (i = 0; i < ncols; i++) {
44
+ cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
45
+ }
46
+
47
+ int transpose = opt_int_value(options, "transpose", 0);
48
+ int npass = opt_int_value(options, "iterations", 1000);
49
+ // a = average, m = means
50
+ int method = opt_int_value(options, "method", 'a');
51
+ // e = euclidian,
52
+ // b = city-block distance
53
+ // c = correlation
54
+ // a = absolute value of the correlation
55
+ // u = uncentered correlation
56
+ // x = absolute uncentered correlation
57
+ // s = spearman's rank correlation
58
+ // k = kendall's tau
59
+ int dist = opt_int_value(options, "metric", 'e');
60
+
61
+ int ifound;
62
+ double error;
63
+ kcluster(NUM2INT(size),
64
+ nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
65
+
66
+ getclustercentroids(NUM2INT(size),
67
+ nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
68
+
69
+ VALUE result = rb_hash_new();
70
+ VALUE cluster = rb_ary_new();
71
+ VALUE centroid = rb_ary_new();
72
+
73
+ for (i = 0; i < nrows; i++) {
74
+ rb_ary_push(cluster, INT2NUM(ccluster[i]));
75
+ VALUE point = rb_ary_new();
76
+ for (j = 0; j < ncols; j++)
77
+ rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
78
+ rb_ary_push(centroid, point);
79
+ }
80
+
81
+ rb_hash_aset(result, ID2SYM(rb_intern("cluster")), cluster);
82
+ rb_hash_aset(result, ID2SYM(rb_intern("centroid")), centroid);
83
+ rb_hash_aset(result, ID2SYM(rb_intern("error")), DBL2NUM(error));
84
+ rb_hash_aset(result, ID2SYM(rb_intern("repeated")), INT2NUM(ifound));
85
+
86
+ for (i = 0; i < nrows; i++) {
87
+ free(cdata[i]);
88
+ free(cmask[i]);
89
+ free(ccentroid[i]);
90
+ free(ccentroid_mask[i]);
91
+ }
92
+
93
+ free(cdata);
94
+ free(cmask);
95
+ free(ccentroid);
96
+ free(ccentroid_mask);
97
+ free(cweights);
98
+ free(ccluster);
99
+
100
+ return result;
101
+ }
102
+
103
+ void Init_flock(void) {
104
+ mFlock = rb_define_module("Flock");
105
+ rb_define_module_function(mFlock, "kmeans", RUBY_METHOD_FUNC(rb_kmeans), -1);
106
+
107
+ rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
108
+ rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
109
+
110
+ rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
111
+ rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
112
+ rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
113
+ rb_define_const(mFlock, "METRIC_ABSOLUTE_CORRELATION", INT2NUM('a'));
114
+ rb_define_const(mFlock, "METRIC_UNCENTERED_CORRELATION", INT2NUM('u'));
115
+ rb_define_const(mFlock, "METRIC_ABSOLUTE_UNCENTERED_CORRELATION", INT2NUM('x'));
116
+ rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
117
+ rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
118
+ }
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: flock
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Bharanee Rathna
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-02-18 00:00:00 +11:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: A thin ruby binding to Cluster 3.0
22
+ email:
23
+ - deepfryed@gmail.com
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/extconf.rb
28
+ extra_rdoc_files:
29
+ - README.rdoc
30
+ files:
31
+ - README.rdoc
32
+ - Rakefile
33
+ - VERSION
34
+ - ext/cluster.c
35
+ - ext/cluster.h
36
+ - ext/extconf.rb
37
+ - ext/flock.c
38
+ - examples/example.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/deepfryed/flock
41
+ licenses: []
42
+
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --charset=UTF-8
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ segments:
54
+ - 0
55
+ version: "0"
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.3.7
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Ruby bindings to Cluster 3.0.
71
+ test_files:
72
+ - examples/example.rb