flock 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +79 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/examples/example.rb +39 -0
- data/ext/cluster.c +4598 -0
- data/ext/cluster.h +93 -0
- data/ext/extconf.rb +5 -0
- data/ext/flock.c +118 -0
- metadata +72 -0
data/ext/cluster.h
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
/******************************************************************************/
|
2
|
+
/* The C Clustering Library.
|
3
|
+
* Copyright (C) 2002 Michiel Jan Laurens de Hoon.
|
4
|
+
*
|
5
|
+
* This library was written at the Laboratory of DNA Information Analysis,
|
6
|
+
* Human Genome Center, Institute of Medical Science, University of Tokyo,
|
7
|
+
* 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
|
8
|
+
* Contact: mdehoon 'AT' gsc.riken.jp
|
9
|
+
*
|
10
|
+
* Permission to use, copy, modify, and distribute this software and its
|
11
|
+
* documentation with or without modifications and for any purpose and
|
12
|
+
* without fee is hereby granted, provided that any copyright notices
|
13
|
+
* appear in all copies and that both those copyright notices and this
|
14
|
+
* permission notice appear in supporting documentation, and that the
|
15
|
+
* names of the contributors or copyright holders not be used in
|
16
|
+
* advertising or publicity pertaining to distribution of the software
|
17
|
+
* without specific prior permission.
|
18
|
+
*
|
19
|
+
* THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
|
20
|
+
* WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
|
21
|
+
* WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
|
22
|
+
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
|
23
|
+
* OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
24
|
+
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
25
|
+
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
26
|
+
* OR PERFORMANCE OF THIS SOFTWARE.
|
27
|
+
*
|
28
|
+
*/
|
29
|
+
|
30
|
+
#ifndef min
|
31
|
+
#define min(x, y) ((x) < (y) ? (x) : (y))
|
32
|
+
#endif
|
33
|
+
#ifndef max
|
34
|
+
#define max(x, y) ((x) > (y) ? (x) : (y))
|
35
|
+
#endif
|
36
|
+
|
37
|
+
#ifdef WINDOWS
|
38
|
+
# include <windows.h>
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#define CLUSTERVERSION "1.50"
|
42
|
+
|
43
|
+
/* Chapter 2 */
|
44
|
+
double clusterdistance (int nrows, int ncolumns, double** data, int** mask,
|
45
|
+
double weight[], int n1, int n2, int index1[], int index2[], char dist,
|
46
|
+
char method, int transpose);
|
47
|
+
double** distancematrix (int ngenes, int ndata, double** data,
|
48
|
+
int** mask, double* weight, char dist, int transpose);
|
49
|
+
|
50
|
+
/* Chapter 3 */
|
51
|
+
int getclustercentroids(int nclusters, int nrows, int ncolumns,
|
52
|
+
double** data, int** mask, int clusterid[], double** cdata, int** cmask,
|
53
|
+
int transpose, char method);
|
54
|
+
void getclustermedoids(int nclusters, int nelements, double** distance,
|
55
|
+
int clusterid[], int centroids[], double errors[]);
|
56
|
+
void kcluster (int nclusters, int ngenes, int ndata, double** data,
|
57
|
+
int** mask, double weight[], int transpose, int npass, char method, char dist,
|
58
|
+
int clusterid[], double* error, int* ifound);
|
59
|
+
void kmedoids (int nclusters, int nelements, double** distance,
|
60
|
+
int npass, int clusterid[], double* error, int* ifound);
|
61
|
+
|
62
|
+
/* Chapter 4 */
|
63
|
+
typedef struct {int left; int right; double distance;} Node;
|
64
|
+
/*
|
65
|
+
* A Node struct describes a single node in a tree created by hierarchical
|
66
|
+
* clustering. The tree can be represented by an array of n Node structs,
|
67
|
+
* where n is the number of elements minus one. The integers left and right
|
68
|
+
* in each Node struct refer to the two elements or subnodes that are joined
|
69
|
+
* in this node. The original elements are numbered 0..nelements-1, and the
|
70
|
+
* nodes -1..-(nelements-1). For each node, distance contains the distance
|
71
|
+
* between the two subnodes that were joined.
|
72
|
+
*/
|
73
|
+
|
74
|
+
Node* treecluster (int nrows, int ncolumns, double** data, int** mask,
|
75
|
+
double weight[], int transpose, char dist, char method, double** distmatrix);
|
76
|
+
void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
|
77
|
+
|
78
|
+
/* Chapter 5 */
|
79
|
+
void somcluster (int nrows, int ncolumns, double** data, int** mask,
|
80
|
+
const double weight[], int transpose, int nxnodes, int nynodes,
|
81
|
+
double inittau, int niter, char dist, double*** celldata,
|
82
|
+
int clusterid[][2]);
|
83
|
+
|
84
|
+
/* Chapter 6 */
|
85
|
+
int pca(int m, int n, double** u, double** v, double* w);
|
86
|
+
|
87
|
+
/* Utility routines, currently undocumented */
|
88
|
+
void sort(int n, const double data[], int index[]);
|
89
|
+
double mean(int n, double x[]);
|
90
|
+
double median (int n, double x[]);
|
91
|
+
|
92
|
+
double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
|
93
|
+
double weights[], int transpose, char dist, double cutoff, double exponent);
|
data/ext/extconf.rb
ADDED
data/ext/flock.c
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
#include <ruby/ruby.h>
|
2
|
+
#include "cluster.h"
|
3
|
+
|
4
|
+
#define ID_CONST_GET rb_intern("const_get")
|
5
|
+
#define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
|
6
|
+
|
7
|
+
static VALUE mFlock;
|
8
|
+
|
9
|
+
int opt_int_value(VALUE option, char *key, int def) {
|
10
|
+
if (NIL_P(option)) return def;
|
11
|
+
|
12
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
13
|
+
return NIL_P(value) ? def : NUM2INT(value);
|
14
|
+
}
|
15
|
+
|
16
|
+
VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
17
|
+
VALUE size, data, mask, weights, options;
|
18
|
+
rb_scan_args(argc, argv, "31", &size, &data, &mask, &options);
|
19
|
+
|
20
|
+
int i,j;
|
21
|
+
int nrows = RARRAY_LEN(data);
|
22
|
+
int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
|
23
|
+
|
24
|
+
double **cdata = (double**)malloc(sizeof(double*)*nrows);
|
25
|
+
int **cmask = (int **)malloc(sizeof(int *)*nrows);
|
26
|
+
double **ccentroid = (double**)malloc(sizeof(double*)*nrows);
|
27
|
+
int **ccentroid_mask = (int **)malloc(sizeof(int *)*nrows);
|
28
|
+
double *cweights = (double *)malloc(sizeof(double )*ncols);
|
29
|
+
int *ccluster = (int *)malloc(sizeof(int )*nrows);
|
30
|
+
|
31
|
+
for (i = 0; i < nrows; i++) {
|
32
|
+
cdata[i] = (double*)malloc(sizeof(double)*ncols);
|
33
|
+
cmask[i] = (int *)malloc(sizeof(int )*ncols);
|
34
|
+
ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
|
35
|
+
ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
|
36
|
+
for (j = 0; j < ncols; j++) {
|
37
|
+
cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
|
38
|
+
cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
|
43
|
+
for (i = 0; i < ncols; i++) {
|
44
|
+
cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
|
45
|
+
}
|
46
|
+
|
47
|
+
int transpose = opt_int_value(options, "transpose", 0);
|
48
|
+
int npass = opt_int_value(options, "iterations", 1000);
|
49
|
+
// a = average, m = means
|
50
|
+
int method = opt_int_value(options, "method", 'a');
|
51
|
+
// e = euclidian,
|
52
|
+
// b = city-block distance
|
53
|
+
// c = correlation
|
54
|
+
// a = absolute value of the correlation
|
55
|
+
// u = uncentered correlation
|
56
|
+
// x = absolute uncentered correlation
|
57
|
+
// s = spearman's rank correlation
|
58
|
+
// k = kendall's tau
|
59
|
+
int dist = opt_int_value(options, "metric", 'e');
|
60
|
+
|
61
|
+
int ifound;
|
62
|
+
double error;
|
63
|
+
kcluster(NUM2INT(size),
|
64
|
+
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
|
65
|
+
|
66
|
+
getclustercentroids(NUM2INT(size),
|
67
|
+
nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
|
68
|
+
|
69
|
+
VALUE result = rb_hash_new();
|
70
|
+
VALUE cluster = rb_ary_new();
|
71
|
+
VALUE centroid = rb_ary_new();
|
72
|
+
|
73
|
+
for (i = 0; i < nrows; i++) {
|
74
|
+
rb_ary_push(cluster, INT2NUM(ccluster[i]));
|
75
|
+
VALUE point = rb_ary_new();
|
76
|
+
for (j = 0; j < ncols; j++)
|
77
|
+
rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
|
78
|
+
rb_ary_push(centroid, point);
|
79
|
+
}
|
80
|
+
|
81
|
+
rb_hash_aset(result, ID2SYM(rb_intern("cluster")), cluster);
|
82
|
+
rb_hash_aset(result, ID2SYM(rb_intern("centroid")), centroid);
|
83
|
+
rb_hash_aset(result, ID2SYM(rb_intern("error")), DBL2NUM(error));
|
84
|
+
rb_hash_aset(result, ID2SYM(rb_intern("repeated")), INT2NUM(ifound));
|
85
|
+
|
86
|
+
for (i = 0; i < nrows; i++) {
|
87
|
+
free(cdata[i]);
|
88
|
+
free(cmask[i]);
|
89
|
+
free(ccentroid[i]);
|
90
|
+
free(ccentroid_mask[i]);
|
91
|
+
}
|
92
|
+
|
93
|
+
free(cdata);
|
94
|
+
free(cmask);
|
95
|
+
free(ccentroid);
|
96
|
+
free(ccentroid_mask);
|
97
|
+
free(cweights);
|
98
|
+
free(ccluster);
|
99
|
+
|
100
|
+
return result;
|
101
|
+
}
|
102
|
+
|
103
|
+
void Init_flock(void) {
|
104
|
+
mFlock = rb_define_module("Flock");
|
105
|
+
rb_define_module_function(mFlock, "kmeans", RUBY_METHOD_FUNC(rb_kmeans), -1);
|
106
|
+
|
107
|
+
rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
|
108
|
+
rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
|
109
|
+
|
110
|
+
rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
|
111
|
+
rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
|
112
|
+
rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
|
113
|
+
rb_define_const(mFlock, "METRIC_ABSOLUTE_CORRELATION", INT2NUM('a'));
|
114
|
+
rb_define_const(mFlock, "METRIC_UNCENTERED_CORRELATION", INT2NUM('u'));
|
115
|
+
rb_define_const(mFlock, "METRIC_ABSOLUTE_UNCENTERED_CORRELATION", INT2NUM('x'));
|
116
|
+
rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
|
117
|
+
rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
|
118
|
+
}
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: flock
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Bharanee Rathna
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-02-18 00:00:00 +11:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: A thin ruby binding to Cluster 3.0
|
22
|
+
email:
|
23
|
+
- deepfryed@gmail.com
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions:
|
27
|
+
- ext/extconf.rb
|
28
|
+
extra_rdoc_files:
|
29
|
+
- README.rdoc
|
30
|
+
files:
|
31
|
+
- README.rdoc
|
32
|
+
- Rakefile
|
33
|
+
- VERSION
|
34
|
+
- ext/cluster.c
|
35
|
+
- ext/cluster.h
|
36
|
+
- ext/extconf.rb
|
37
|
+
- ext/flock.c
|
38
|
+
- examples/example.rb
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/deepfryed/flock
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --charset=UTF-8
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
segments:
|
54
|
+
- 0
|
55
|
+
version: "0"
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
requirements: []
|
65
|
+
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.3.7
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: Ruby bindings to Cluster 3.0.
|
71
|
+
test_files:
|
72
|
+
- examples/example.rb
|