flock 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +79 -0
- data/Rakefile +18 -0
- data/VERSION +1 -0
- data/examples/example.rb +39 -0
- data/ext/cluster.c +4598 -0
- data/ext/cluster.h +93 -0
- data/ext/extconf.rb +5 -0
- data/ext/flock.c +118 -0
- metadata +72 -0
data/ext/cluster.h
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
/******************************************************************************/
|
2
|
+
/* The C Clustering Library.
|
3
|
+
* Copyright (C) 2002 Michiel Jan Laurens de Hoon.
|
4
|
+
*
|
5
|
+
* This library was written at the Laboratory of DNA Information Analysis,
|
6
|
+
* Human Genome Center, Institute of Medical Science, University of Tokyo,
|
7
|
+
* 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
|
8
|
+
* Contact: mdehoon 'AT' gsc.riken.jp
|
9
|
+
*
|
10
|
+
* Permission to use, copy, modify, and distribute this software and its
|
11
|
+
* documentation with or without modifications and for any purpose and
|
12
|
+
* without fee is hereby granted, provided that any copyright notices
|
13
|
+
* appear in all copies and that both those copyright notices and this
|
14
|
+
* permission notice appear in supporting documentation, and that the
|
15
|
+
* names of the contributors or copyright holders not be used in
|
16
|
+
* advertising or publicity pertaining to distribution of the software
|
17
|
+
* without specific prior permission.
|
18
|
+
*
|
19
|
+
* THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
|
20
|
+
* WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
|
21
|
+
* WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
|
22
|
+
* CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
|
23
|
+
* OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
24
|
+
* OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
25
|
+
* OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
26
|
+
* OR PERFORMANCE OF THIS SOFTWARE.
|
27
|
+
*
|
28
|
+
*/
|
29
|
+
|
30
|
+
#ifndef min
|
31
|
+
#define min(x, y) ((x) < (y) ? (x) : (y))
|
32
|
+
#endif
|
33
|
+
#ifndef max
|
34
|
+
#define max(x, y) ((x) > (y) ? (x) : (y))
|
35
|
+
#endif
|
36
|
+
|
37
|
+
#ifdef WINDOWS
|
38
|
+
# include <windows.h>
|
39
|
+
#endif
|
40
|
+
|
41
|
+
#define CLUSTERVERSION "1.50"
|
42
|
+
|
43
|
+
/* Chapter 2 */
|
44
|
+
double clusterdistance (int nrows, int ncolumns, double** data, int** mask,
|
45
|
+
double weight[], int n1, int n2, int index1[], int index2[], char dist,
|
46
|
+
char method, int transpose);
|
47
|
+
double** distancematrix (int ngenes, int ndata, double** data,
|
48
|
+
int** mask, double* weight, char dist, int transpose);
|
49
|
+
|
50
|
+
/* Chapter 3 */
|
51
|
+
int getclustercentroids(int nclusters, int nrows, int ncolumns,
|
52
|
+
double** data, int** mask, int clusterid[], double** cdata, int** cmask,
|
53
|
+
int transpose, char method);
|
54
|
+
void getclustermedoids(int nclusters, int nelements, double** distance,
|
55
|
+
int clusterid[], int centroids[], double errors[]);
|
56
|
+
void kcluster (int nclusters, int ngenes, int ndata, double** data,
|
57
|
+
int** mask, double weight[], int transpose, int npass, char method, char dist,
|
58
|
+
int clusterid[], double* error, int* ifound);
|
59
|
+
void kmedoids (int nclusters, int nelements, double** distance,
|
60
|
+
int npass, int clusterid[], double* error, int* ifound);
|
61
|
+
|
62
|
+
/* Chapter 4 */
|
63
|
+
typedef struct {int left; int right; double distance;} Node;
|
64
|
+
/*
|
65
|
+
* A Node struct describes a single node in a tree created by hierarchical
|
66
|
+
* clustering. The tree can be represented by an array of n Node structs,
|
67
|
+
* where n is the number of elements minus one. The integers left and right
|
68
|
+
* in each Node struct refer to the two elements or subnodes that are joined
|
69
|
+
* in this node. The original elements are numbered 0..nelements-1, and the
|
70
|
+
* nodes -1..-(nelements-1). For each node, distance contains the distance
|
71
|
+
* between the two subnodes that were joined.
|
72
|
+
*/
|
73
|
+
|
74
|
+
Node* treecluster (int nrows, int ncolumns, double** data, int** mask,
|
75
|
+
double weight[], int transpose, char dist, char method, double** distmatrix);
|
76
|
+
void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
|
77
|
+
|
78
|
+
/* Chapter 5 */
|
79
|
+
void somcluster (int nrows, int ncolumns, double** data, int** mask,
|
80
|
+
const double weight[], int transpose, int nxnodes, int nynodes,
|
81
|
+
double inittau, int niter, char dist, double*** celldata,
|
82
|
+
int clusterid[][2]);
|
83
|
+
|
84
|
+
/* Chapter 6 */
|
85
|
+
int pca(int m, int n, double** u, double** v, double* w);
|
86
|
+
|
87
|
+
/* Utility routines, currently undocumented */
|
88
|
+
void sort(int n, const double data[], int index[]);
|
89
|
+
double mean(int n, double x[]);
|
90
|
+
double median (int n, double x[]);
|
91
|
+
|
92
|
+
double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
|
93
|
+
double weights[], int transpose, char dist, double cutoff, double exponent);
|
data/ext/extconf.rb
ADDED
data/ext/flock.c
ADDED
@@ -0,0 +1,118 @@
|
|
1
|
+
#include <ruby/ruby.h>
|
2
|
+
#include "cluster.h"
|
3
|
+
|
4
|
+
#define ID_CONST_GET rb_intern("const_get")
|
5
|
+
#define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
|
6
|
+
|
7
|
+
static VALUE mFlock;
|
8
|
+
|
9
|
+
int opt_int_value(VALUE option, char *key, int def) {
|
10
|
+
if (NIL_P(option)) return def;
|
11
|
+
|
12
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
13
|
+
return NIL_P(value) ? def : NUM2INT(value);
|
14
|
+
}
|
15
|
+
|
16
|
+
VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
17
|
+
VALUE size, data, mask, weights, options;
|
18
|
+
rb_scan_args(argc, argv, "31", &size, &data, &mask, &options);
|
19
|
+
|
20
|
+
int i,j;
|
21
|
+
int nrows = RARRAY_LEN(data);
|
22
|
+
int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
|
23
|
+
|
24
|
+
double **cdata = (double**)malloc(sizeof(double*)*nrows);
|
25
|
+
int **cmask = (int **)malloc(sizeof(int *)*nrows);
|
26
|
+
double **ccentroid = (double**)malloc(sizeof(double*)*nrows);
|
27
|
+
int **ccentroid_mask = (int **)malloc(sizeof(int *)*nrows);
|
28
|
+
double *cweights = (double *)malloc(sizeof(double )*ncols);
|
29
|
+
int *ccluster = (int *)malloc(sizeof(int )*nrows);
|
30
|
+
|
31
|
+
for (i = 0; i < nrows; i++) {
|
32
|
+
cdata[i] = (double*)malloc(sizeof(double)*ncols);
|
33
|
+
cmask[i] = (int *)malloc(sizeof(int )*ncols);
|
34
|
+
ccentroid[i] = (double*)malloc(sizeof(double)*ncols);
|
35
|
+
ccentroid_mask[i] = (int *)malloc(sizeof(int )*ncols);
|
36
|
+
for (j = 0; j < ncols; j++) {
|
37
|
+
cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
|
38
|
+
cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
|
39
|
+
}
|
40
|
+
}
|
41
|
+
|
42
|
+
weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
|
43
|
+
for (i = 0; i < ncols; i++) {
|
44
|
+
cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_ary_entry(weights, i));
|
45
|
+
}
|
46
|
+
|
47
|
+
int transpose = opt_int_value(options, "transpose", 0);
|
48
|
+
int npass = opt_int_value(options, "iterations", 1000);
|
49
|
+
// a = average, m = means
|
50
|
+
int method = opt_int_value(options, "method", 'a');
|
51
|
+
// e = euclidian,
|
52
|
+
// b = city-block distance
|
53
|
+
// c = correlation
|
54
|
+
// a = absolute value of the correlation
|
55
|
+
// u = uncentered correlation
|
56
|
+
// x = absolute uncentered correlation
|
57
|
+
// s = spearman's rank correlation
|
58
|
+
// k = kendall's tau
|
59
|
+
int dist = opt_int_value(options, "metric", 'e');
|
60
|
+
|
61
|
+
int ifound;
|
62
|
+
double error;
|
63
|
+
kcluster(NUM2INT(size),
|
64
|
+
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
|
65
|
+
|
66
|
+
getclustercentroids(NUM2INT(size),
|
67
|
+
nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
|
68
|
+
|
69
|
+
VALUE result = rb_hash_new();
|
70
|
+
VALUE cluster = rb_ary_new();
|
71
|
+
VALUE centroid = rb_ary_new();
|
72
|
+
|
73
|
+
for (i = 0; i < nrows; i++) {
|
74
|
+
rb_ary_push(cluster, INT2NUM(ccluster[i]));
|
75
|
+
VALUE point = rb_ary_new();
|
76
|
+
for (j = 0; j < ncols; j++)
|
77
|
+
rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
|
78
|
+
rb_ary_push(centroid, point);
|
79
|
+
}
|
80
|
+
|
81
|
+
rb_hash_aset(result, ID2SYM(rb_intern("cluster")), cluster);
|
82
|
+
rb_hash_aset(result, ID2SYM(rb_intern("centroid")), centroid);
|
83
|
+
rb_hash_aset(result, ID2SYM(rb_intern("error")), DBL2NUM(error));
|
84
|
+
rb_hash_aset(result, ID2SYM(rb_intern("repeated")), INT2NUM(ifound));
|
85
|
+
|
86
|
+
for (i = 0; i < nrows; i++) {
|
87
|
+
free(cdata[i]);
|
88
|
+
free(cmask[i]);
|
89
|
+
free(ccentroid[i]);
|
90
|
+
free(ccentroid_mask[i]);
|
91
|
+
}
|
92
|
+
|
93
|
+
free(cdata);
|
94
|
+
free(cmask);
|
95
|
+
free(ccentroid);
|
96
|
+
free(ccentroid_mask);
|
97
|
+
free(cweights);
|
98
|
+
free(ccluster);
|
99
|
+
|
100
|
+
return result;
|
101
|
+
}
|
102
|
+
|
103
|
+
void Init_flock(void) {
|
104
|
+
mFlock = rb_define_module("Flock");
|
105
|
+
rb_define_module_function(mFlock, "kmeans", RUBY_METHOD_FUNC(rb_kmeans), -1);
|
106
|
+
|
107
|
+
rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
|
108
|
+
rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
|
109
|
+
|
110
|
+
rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
|
111
|
+
rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
|
112
|
+
rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
|
113
|
+
rb_define_const(mFlock, "METRIC_ABSOLUTE_CORRELATION", INT2NUM('a'));
|
114
|
+
rb_define_const(mFlock, "METRIC_UNCENTERED_CORRELATION", INT2NUM('u'));
|
115
|
+
rb_define_const(mFlock, "METRIC_ABSOLUTE_UNCENTERED_CORRELATION", INT2NUM('x'));
|
116
|
+
rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
|
117
|
+
rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
|
118
|
+
}
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: flock
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
version: 0.1.0
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Bharanee Rathna
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2011-02-18 00:00:00 +11:00
|
18
|
+
default_executable:
|
19
|
+
dependencies: []
|
20
|
+
|
21
|
+
description: A thin ruby binding to Cluster 3.0
|
22
|
+
email:
|
23
|
+
- deepfryed@gmail.com
|
24
|
+
executables: []
|
25
|
+
|
26
|
+
extensions:
|
27
|
+
- ext/extconf.rb
|
28
|
+
extra_rdoc_files:
|
29
|
+
- README.rdoc
|
30
|
+
files:
|
31
|
+
- README.rdoc
|
32
|
+
- Rakefile
|
33
|
+
- VERSION
|
34
|
+
- ext/cluster.c
|
35
|
+
- ext/cluster.h
|
36
|
+
- ext/extconf.rb
|
37
|
+
- ext/flock.c
|
38
|
+
- examples/example.rb
|
39
|
+
has_rdoc: true
|
40
|
+
homepage: http://github.com/deepfryed/flock
|
41
|
+
licenses: []
|
42
|
+
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --charset=UTF-8
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
segments:
|
54
|
+
- 0
|
55
|
+
version: "0"
|
56
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
segments:
|
62
|
+
- 0
|
63
|
+
version: "0"
|
64
|
+
requirements: []
|
65
|
+
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.3.7
|
68
|
+
signing_key:
|
69
|
+
specification_version: 3
|
70
|
+
summary: Ruby bindings to Cluster 3.0.
|
71
|
+
test_files:
|
72
|
+
- examples/example.rb
|