flock 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/cluster.h ADDED
@@ -0,0 +1,93 @@
1
+ /******************************************************************************/
2
+ /* The C Clustering Library.
3
+ * Copyright (C) 2002 Michiel Jan Laurens de Hoon.
4
+ *
5
+ * This library was written at the Laboratory of DNA Information Analysis,
6
+ * Human Genome Center, Institute of Medical Science, University of Tokyo,
7
+ * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
8
+ * Contact: mdehoon 'AT' gsc.riken.jp
9
+ *
10
+ * Permission to use, copy, modify, and distribute this software and its
11
+ * documentation with or without modifications and for any purpose and
12
+ * without fee is hereby granted, provided that any copyright notices
13
+ * appear in all copies and that both those copyright notices and this
14
+ * permission notice appear in supporting documentation, and that the
15
+ * names of the contributors or copyright holders not be used in
16
+ * advertising or publicity pertaining to distribution of the software
17
+ * without specific prior permission.
18
+ *
19
+ * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
20
+ * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
21
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
22
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
23
+ * OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
24
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
25
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
26
+ * OR PERFORMANCE OF THIS SOFTWARE.
27
+ *
28
+ */
29
+
30
+ #ifndef min
31
+ #define min(x, y) ((x) < (y) ? (x) : (y))
32
+ #endif
33
+ #ifndef max
34
+ #define max(x, y) ((x) > (y) ? (x) : (y))
35
+ #endif
36
+
37
+ #ifdef WINDOWS
38
+ # include <windows.h>
39
+ #endif
40
+
41
+ #define CLUSTERVERSION "1.50"
42
+
43
+ /* Chapter 2 */
44
+ double clusterdistance (int nrows, int ncolumns, double** data, int** mask,
45
+ double weight[], int n1, int n2, int index1[], int index2[], char dist,
46
+ char method, int transpose);
47
+ double** distancematrix (int ngenes, int ndata, double** data,
48
+ int** mask, double* weight, char dist, int transpose);
49
+
50
+ /* Chapter 3 */
51
+ int getclustercentroids(int nclusters, int nrows, int ncolumns,
52
+ double** data, int** mask, int clusterid[], double** cdata, int** cmask,
53
+ int transpose, char method);
54
+ void getclustermedoids(int nclusters, int nelements, double** distance,
55
+ int clusterid[], int centroids[], double errors[]);
56
+ void kcluster (int nclusters, int ngenes, int ndata, double** data,
57
+ int** mask, double weight[], int transpose, int npass, char method, char dist,
58
+ int clusterid[], double* error, int* ifound);
59
+ void kmedoids (int nclusters, int nelements, double** distance,
60
+ int npass, int clusterid[], double* error, int* ifound);
61
+
62
+ /* Chapter 4 */
63
+ typedef struct {int left; int right; double distance;} Node;
64
+ /*
65
+ * A Node struct describes a single node in a tree created by hierarchical
66
+ * clustering. The tree can be represented by an array of n Node structs,
67
+ * where n is the number of elements minus one. The integers left and right
68
+ * in each Node struct refer to the two elements or subnodes that are joined
69
+ * in this node. The original elements are numbered 0..nelements-1, and the
70
+ * nodes -1..-(nelements-1). For each node, distance contains the distance
71
+ * between the two subnodes that were joined.
72
+ */
73
+
74
+ Node* treecluster (int nrows, int ncolumns, double** data, int** mask,
75
+ double weight[], int transpose, char dist, char method, double** distmatrix);
76
+ void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
77
+
78
+ /* Chapter 5 */
79
+ void somcluster (int nrows, int ncolumns, double** data, int** mask,
80
+ const double weight[], int transpose, int nxnodes, int nynodes,
81
+ double inittau, int niter, char dist, double*** celldata,
82
+ int clusterid[][2]);
83
+
84
+ /* Chapter 6 */
85
+ int pca(int m, int n, double** u, double** v, double* w);
86
+
87
+ /* Utility routines, currently undocumented */
88
+ void sort(int n, const double data[], int index[]);
89
+ double mean(int n, double x[]);
90
+ double median (int n, double x[]);
91
+
92
+ double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
93
+ double weights[], int transpose, char dist, double cutoff, double exponent);
data/ext/extconf.rb ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'mkmf'
4
+ $CFLAGS = '-fPIC -Os -Wall'
5
+ create_makefile('flock')
data/ext/flock.c ADDED
@@ -0,0 +1,118 @@
1
+ #include <ruby/ruby.h>
2
+ #include "cluster.h"
3
+
4
+ #define ID_CONST_GET rb_intern("const_get")
5
+ #define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
6
+
7
+ static VALUE mFlock;
8
+
9
+ int opt_int_value(VALUE option, char *key, int def) {
10
+ if (NIL_P(option)) return def;
11
+
12
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
13
+ return NIL_P(value) ? def : NUM2INT(value);
14
+ }
15
+
16
+ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
17
+ VALUE size, data, mask, weights, options;
18
+ rb_scan_args(argc, argv, "31", &size, &data, &mask, &options);
19
+
20
+ int i,j;
21
+ int nrows = RARRAY_LEN(data);
22
+ int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
23
+
24
+ double **cdata = (double**)malloc(sizeof(double*)*nrows);
25
+ int **cmask = (int **)malloc(sizeof(int *)*nrows);
26
+ double **ccentroid = (double**)malloc(sizeof(double*)*nrows);
27
+ int **ccentroid_mask = (int **)malloc(sizeof(int *)*nrows);
28
+ double *cweights = (double *)malloc(sizeof(double )*ncols);
29
+ int *ccluster = (int *)malloc(sizeof(int )*nrows);
30
+
31
+ for (i = 0; i < nrows; i++) {
32
+ cdata[i] = (double*)malloc(sizeof(double)*ncols);
33
+ cmask[i] = (int *)malloc(sizeof(int )*ncols);
34
+ ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
35
+ ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
36
+ for (j = 0; j < ncols; j++) {
37
+ cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
38
+ cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
39
+ }
40
+ }
41
+
42
+ weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
43
+ for (i = 0; i < ncols; i++) {
44
+ cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
45
+ }
46
+
47
+ int transpose = opt_int_value(options, "transpose", 0);
48
+ int npass = opt_int_value(options, "iterations", 1000);
49
+ // a = average, m = means
50
+ int method = opt_int_value(options, "method", 'a');
51
+ // e = euclidian,
52
+ // b = city-block distance
53
+ // c = correlation
54
+ // a = absolute value of the correlation
55
+ // u = uncentered correlation
56
+ // x = absolute uncentered correlation
57
+ // s = spearman's rank correlation
58
+ // k = kendall's tau
59
+ int dist = opt_int_value(options, "metric", 'e');
60
+
61
+ int ifound;
62
+ double error;
63
+ kcluster(NUM2INT(size),
64
+ nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
65
+
66
+ getclustercentroids(NUM2INT(size),
67
+ nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
68
+
69
+ VALUE result = rb_hash_new();
70
+ VALUE cluster = rb_ary_new();
71
+ VALUE centroid = rb_ary_new();
72
+
73
+ for (i = 0; i < nrows; i++) {
74
+ rb_ary_push(cluster, INT2NUM(ccluster[i]));
75
+ VALUE point = rb_ary_new();
76
+ for (j = 0; j < ncols; j++)
77
+ rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
78
+ rb_ary_push(centroid, point);
79
+ }
80
+
81
+ rb_hash_aset(result, ID2SYM(rb_intern("cluster")), cluster);
82
+ rb_hash_aset(result, ID2SYM(rb_intern("centroid")), centroid);
83
+ rb_hash_aset(result, ID2SYM(rb_intern("error")), DBL2NUM(error));
84
+ rb_hash_aset(result, ID2SYM(rb_intern("repeated")), INT2NUM(ifound));
85
+
86
+ for (i = 0; i < nrows; i++) {
87
+ free(cdata[i]);
88
+ free(cmask[i]);
89
+ free(ccentroid[i]);
90
+ free(ccentroid_mask[i]);
91
+ }
92
+
93
+ free(cdata);
94
+ free(cmask);
95
+ free(ccentroid);
96
+ free(ccentroid_mask);
97
+ free(cweights);
98
+ free(ccluster);
99
+
100
+ return result;
101
+ }
102
+
103
+ void Init_flock(void) {
104
+ mFlock = rb_define_module("Flock");
105
+ rb_define_module_function(mFlock, "kmeans", RUBY_METHOD_FUNC(rb_kmeans), -1);
106
+
107
+ rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
108
+ rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
109
+
110
+ rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
111
+ rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
112
+ rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
113
+ rb_define_const(mFlock, "METRIC_ABSOLUTE_CORRELATION", INT2NUM('a'));
114
+ rb_define_const(mFlock, "METRIC_UNCENTERED_CORRELATION", INT2NUM('u'));
115
+ rb_define_const(mFlock, "METRIC_ABSOLUTE_UNCENTERED_CORRELATION", INT2NUM('x'));
116
+ rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
117
+ rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
118
+ }
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: flock
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - Bharanee Rathna
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2011-02-18 00:00:00 +11:00
18
+ default_executable:
19
+ dependencies: []
20
+
21
+ description: A thin ruby binding to Cluster 3.0
22
+ email:
23
+ - deepfryed@gmail.com
24
+ executables: []
25
+
26
+ extensions:
27
+ - ext/extconf.rb
28
+ extra_rdoc_files:
29
+ - README.rdoc
30
+ files:
31
+ - README.rdoc
32
+ - Rakefile
33
+ - VERSION
34
+ - ext/cluster.c
35
+ - ext/cluster.h
36
+ - ext/extconf.rb
37
+ - ext/flock.c
38
+ - examples/example.rb
39
+ has_rdoc: true
40
+ homepage: http://github.com/deepfryed/flock
41
+ licenses: []
42
+
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --charset=UTF-8
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ segments:
54
+ - 0
55
+ version: "0"
56
+ required_rubygems_version: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ segments:
62
+ - 0
63
+ version: "0"
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.3.7
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: Ruby bindings to Cluster 3.0.
71
+ test_files:
72
+ - examples/example.rb