flock 0.4.1 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/API.rdoc +22 -0
- data/README.rdoc +44 -14
- data/Rakefile +5 -0
- data/VERSION +1 -1
- data/ext/cluster.c +2993 -2680
- data/ext/cluster.h +1 -1
- data/ext/flock.c +235 -67
- data/ext/kmeanspp.c +129 -0
- data/flock.gemspec +13 -18
- data/lib/flock.rb +229 -32
- metadata +10 -15
- data/examples/dense.rb +0 -38
- data/examples/som.rb +0 -13
- data/examples/sparse.rb +0 -22
- data/examples/treecluster.rb +0 -13
data/ext/cluster.h
CHANGED
@@ -55,7 +55,7 @@ void getclustermedoids(int nclusters, int nelements, double** distance,
|
|
55
55
|
int clusterid[], int centroids[], double errors[]);
|
56
56
|
void kcluster (int nclusters, int ngenes, int ndata, double** data,
|
57
57
|
int** mask, double weight[], int transpose, int npass, char method, char dist,
|
58
|
-
int clusterid[], double* error, int* ifound);
|
58
|
+
int clusterid[], double* error, int* ifound, int assign);
|
59
59
|
void kmedoids (int nclusters, int nelements, double** distance,
|
60
60
|
int npass, int clusterid[], double* error, int* ifound);
|
61
61
|
|
data/ext/flock.c
CHANGED
@@ -3,41 +3,60 @@
|
|
3
3
|
|
4
4
|
#define ID_CONST_GET rb_intern("const_get")
|
5
5
|
#define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
|
6
|
+
#define DEFAULT_ITERATIONS 100
|
6
7
|
|
7
|
-
static VALUE mFlock;
|
8
|
+
static VALUE mFlock, scFlock;
|
8
9
|
typedef double (*distance_fn)(int, double**, double**, int**, int**, const double [], int, int, int);
|
9
10
|
|
10
|
-
int
|
11
|
-
|
11
|
+
int get_int_option(VALUE option, char *key, int default_value) {
|
12
|
+
if (NIL_P(option)) return default_value;
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
15
|
+
return NIL_P(value) ? default_value : NUM2INT(value);
|
15
16
|
}
|
16
17
|
|
17
|
-
int
|
18
|
-
|
18
|
+
int get_bool_option(VALUE option, char *key, int default_value) {
|
19
|
+
if (NIL_P(option)) return default_value;
|
20
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
21
|
+
return (TYPE(value) == T_FALSE || TYPE(value) == T_NIL) ? 0 : 1;
|
22
|
+
}
|
23
|
+
|
24
|
+
double get_dbl_option(VALUE option, char *key, double default_value) {
|
25
|
+
if (NIL_P(option)) return default_value;
|
19
26
|
|
20
|
-
|
21
|
-
|
27
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
28
|
+
return NIL_P(value) ? default_value : NUM2DBL(value);
|
22
29
|
}
|
23
30
|
|
24
|
-
VALUE
|
31
|
+
VALUE get_value_option(VALUE option, char *key, VALUE default_value) {
|
32
|
+
if (NIL_P(option)) return default_value;
|
33
|
+
|
34
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
35
|
+
return NIL_P(value) ? default_value : value;
|
36
|
+
}
|
37
|
+
|
38
|
+
/* @api private */
|
39
|
+
VALUE rb_do_kcluster(int argc, VALUE *argv, VALUE self) {
|
25
40
|
VALUE size, data, mask, weights, options;
|
26
|
-
rb_scan_args(argc, argv, "
|
41
|
+
rb_scan_args(argc, argv, "21", &size, &data, &options);
|
27
42
|
|
28
43
|
if (TYPE(data) != T_ARRAY)
|
29
44
|
rb_raise(rb_eArgError, "data should be an array of arrays");
|
30
45
|
|
46
|
+
if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
|
47
|
+
rb_raise(rb_eArgError, "size should be > 0 and <= data size");
|
48
|
+
|
49
|
+
mask = get_value_option(options, "mask", Qnil);
|
50
|
+
|
31
51
|
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
32
52
|
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
33
53
|
|
34
|
-
|
35
|
-
|
54
|
+
int transpose = get_bool_option(options, "transpose", 0);
|
55
|
+
int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
|
36
56
|
|
37
|
-
int transpose = opt_int_value(options, "transpose", 0);
|
38
|
-
int npass = opt_int_value(options, "iterations", 1000);
|
39
57
|
// a = average, m = means
|
40
|
-
int method =
|
58
|
+
int method = get_int_option(options, "method", 'a');
|
59
|
+
|
41
60
|
// e = euclidian,
|
42
61
|
// b = city-block distance
|
43
62
|
// c = correlation
|
@@ -46,7 +65,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
46
65
|
// x = absolute uncentered correlation
|
47
66
|
// s = spearman's rank correlation
|
48
67
|
// k = kendall's tau
|
49
|
-
int dist =
|
68
|
+
int dist = get_int_option(options, "metric", 'e');
|
69
|
+
|
70
|
+
// initial assignment
|
71
|
+
int assign = get_int_option(options, "seed", 0);
|
50
72
|
|
51
73
|
int i,j;
|
52
74
|
int nrows = RARRAY_LEN(data);
|
@@ -94,7 +116,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
94
116
|
double error;
|
95
117
|
|
96
118
|
kcluster(nsets,
|
97
|
-
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
|
119
|
+
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound, assign);
|
98
120
|
getclustercentroids(nsets,
|
99
121
|
nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
|
100
122
|
|
@@ -137,13 +159,16 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
137
159
|
return result;
|
138
160
|
}
|
139
161
|
|
140
|
-
|
162
|
+
/* @api private */
|
163
|
+
VALUE rb_do_self_organizing_map(int argc, VALUE *argv, VALUE self) {
|
141
164
|
VALUE nx, ny, data, mask, weights, options;
|
142
|
-
rb_scan_args(argc, argv, "
|
165
|
+
rb_scan_args(argc, argv, "31", &nx, &ny, &data, &options);
|
143
166
|
|
144
167
|
if (TYPE(data) != T_ARRAY)
|
145
168
|
rb_raise(rb_eArgError, "data should be an array of arrays");
|
146
169
|
|
170
|
+
mask = get_value_option(options, "mask", Qnil);
|
171
|
+
|
147
172
|
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
148
173
|
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
149
174
|
|
@@ -155,8 +180,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
155
180
|
|
156
181
|
int nxgrid = NUM2INT(rb_Integer(nx));
|
157
182
|
int nygrid = NUM2INT(rb_Integer(ny));
|
158
|
-
int transpose =
|
159
|
-
int npass =
|
183
|
+
int transpose = get_int_option(options, "transpose", 0);
|
184
|
+
int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
|
160
185
|
|
161
186
|
// e = euclidian,
|
162
187
|
// b = city-block distance
|
@@ -166,8 +191,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
166
191
|
// x = absolute uncentered correlation
|
167
192
|
// s = spearman's rank correlation
|
168
193
|
// k = kendall's tau
|
169
|
-
int dist =
|
170
|
-
double tau =
|
194
|
+
int dist = get_int_option(options, "metric", 'e');
|
195
|
+
double tau = get_dbl_option(options, "tau", 1.0);
|
171
196
|
|
172
197
|
int i, j, k;
|
173
198
|
int nrows = RARRAY_LEN(data);
|
@@ -211,8 +236,7 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
211
236
|
ccelldata[i][j] = (double *)malloc(sizeof(double)*dimy);
|
212
237
|
}
|
213
238
|
|
214
|
-
somcluster(nrows, ncols, cdata, cmask, cweights, transpose,
|
215
|
-
nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
|
239
|
+
somcluster(nrows, ncols, cdata, cmask, cweights, transpose, nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
|
216
240
|
|
217
241
|
VALUE result = rb_hash_new();
|
218
242
|
VALUE cluster = rb_ary_new();
|
@@ -260,22 +284,30 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
260
284
|
return result;
|
261
285
|
}
|
262
286
|
|
263
|
-
|
287
|
+
/* @api private */
|
288
|
+
VALUE rb_do_treecluster(int argc, VALUE *argv, VALUE self) {
|
264
289
|
VALUE size, data, mask, weights, options;
|
265
|
-
rb_scan_args(argc, argv, "
|
290
|
+
rb_scan_args(argc, argv, "21", &size, &data, &options);
|
266
291
|
|
267
292
|
if (TYPE(data) != T_ARRAY)
|
268
293
|
rb_raise(rb_eArgError, "data should be an array of arrays");
|
269
294
|
|
295
|
+
mask = get_value_option(options, "mask", Qnil);
|
296
|
+
|
270
297
|
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
271
298
|
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
272
299
|
|
273
300
|
if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
|
274
301
|
rb_raise(rb_eArgError, "size should be > 0 and <= data size");
|
275
302
|
|
276
|
-
int transpose =
|
277
|
-
|
278
|
-
|
303
|
+
int transpose = get_int_option(options, "transpose", 0);
|
304
|
+
|
305
|
+
// s: pairwise single-linkage clustering
|
306
|
+
// m: pairwise maximum- (or complete-) linkage clustering
|
307
|
+
// a: pairwise average-linkage clustering
|
308
|
+
// c: pairwise centroid-linkage clustering
|
309
|
+
int method = get_int_option(options, "method", 'a');
|
310
|
+
|
279
311
|
// e = euclidian,
|
280
312
|
// b = city-block distance
|
281
313
|
// c = correlation
|
@@ -284,7 +316,7 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
|
|
284
316
|
// x = absolute uncentered correlation
|
285
317
|
// s = spearman's rank correlation
|
286
318
|
// k = kendall's tau
|
287
|
-
int dist =
|
319
|
+
int dist = get_int_option(options, "metric", 'e');
|
288
320
|
|
289
321
|
int i,j;
|
290
322
|
int nrows = RARRAY_LEN(data);
|
@@ -346,15 +378,25 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
|
|
346
378
|
if (tree)
|
347
379
|
free(tree);
|
348
380
|
else
|
349
|
-
rb_raise(rb_eNoMemError, "
|
381
|
+
rb_raise(rb_eNoMemError, "treecluster ran out of memory");
|
350
382
|
|
351
383
|
return result;
|
352
384
|
}
|
353
385
|
|
354
|
-
|
386
|
+
void inline copy_mask(VALUE src, int *dst, int size, int def) {
|
387
|
+
int i;
|
388
|
+
if (NIL_P(src))
|
389
|
+
for (i = 0; i < size; i++)
|
390
|
+
dst[i] = def;
|
391
|
+
else
|
392
|
+
for (i = 0; i < size; i++)
|
393
|
+
dst[i] = NUM2INT(rb_ary_entry(src, i));
|
394
|
+
}
|
395
|
+
|
396
|
+
VALUE rb_distance(VALUE vec1, VALUE m1, VALUE vec2, VALUE m2, distance_fn fn) {
|
355
397
|
uint32_t size;
|
356
398
|
double *data1, *data2, *weight, dist;
|
357
|
-
int *
|
399
|
+
int *mask1, *mask2, i;
|
358
400
|
|
359
401
|
if (TYPE(vec1) != T_ARRAY)
|
360
402
|
rb_raise(rb_eArgError, "vector1 should be an array");
|
@@ -373,17 +415,21 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
|
|
373
415
|
data1 = (double *)malloc(sizeof(double)*size);
|
374
416
|
data2 = (double *)malloc(sizeof(double)*size);
|
375
417
|
weight = (double *)malloc(sizeof(double)*size);
|
376
|
-
|
418
|
+
mask1 = (int *)malloc(sizeof(int)*size);
|
419
|
+
mask2 = (int *)malloc(sizeof(int)*size);
|
377
420
|
|
378
421
|
for (i = 0; i < size; i++) {
|
379
|
-
mask[i] = 1;
|
380
422
|
weight[i] = 1;
|
381
423
|
data1[i] = NUM2DBL(rb_ary_entry(vec1, i));
|
382
424
|
data2[i] = NUM2DBL(rb_ary_entry(vec2, i));
|
383
425
|
}
|
384
426
|
|
385
|
-
|
386
|
-
|
427
|
+
copy_mask(m1, mask1, size, 1);
|
428
|
+
copy_mask(m2, mask2, size, 1);
|
429
|
+
|
430
|
+
dist = fn(size, &data1, &data2, &mask1, &mask2, weight, 0, 0, 0);
|
431
|
+
free(mask1);
|
432
|
+
free(mask2);
|
387
433
|
free(weight);
|
388
434
|
free(data2);
|
389
435
|
free(data1);
|
@@ -391,48 +437,155 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
|
|
391
437
|
return DBL2NUM(dist);
|
392
438
|
}
|
393
439
|
|
394
|
-
|
395
|
-
|
440
|
+
/*
|
441
|
+
Euclidian distance measure
|
442
|
+
|
443
|
+
@example
|
444
|
+
Flock.euclidian_distance([0, 0], [1, 1])
|
445
|
+
Flock.euclidian_distance([0, 0, 0], [1, 1, 1], [1, 1, 0], [1, 1, 0]) # with mask
|
446
|
+
|
447
|
+
@overload euclidian_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
448
|
+
@param [Array] vector1 Numeric vector
|
449
|
+
@param [Array] vector2 Numeric vector
|
450
|
+
@param [Array] mask1 Optional mask for vector1
|
451
|
+
@param [Array] mask2 Optional mask for vector2
|
452
|
+
*/
|
453
|
+
VALUE rb_euclid(int argc, VALUE *argv, VALUE self) {
|
454
|
+
VALUE v1, v2, m1, m2;
|
455
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
456
|
+
return rb_distance(v1, m1, v2, m2, euclid);
|
396
457
|
}
|
397
458
|
|
398
|
-
|
399
|
-
|
459
|
+
/*
|
460
|
+
Cityblock distance measure
|
461
|
+
|
462
|
+
@overload cityblock_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
463
|
+
@param [Array] vector1 Numeric vector
|
464
|
+
@param [Array] vector2 Numeric vector
|
465
|
+
@param [Array] mask1 Optional mask for vector1
|
466
|
+
@param [Array] mask2 Optional mask for vector2
|
467
|
+
*/
|
468
|
+
VALUE rb_cityblock(int argc, VALUE *argv, VALUE self) {
|
469
|
+
VALUE v1, v2, m1, m2;
|
470
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
471
|
+
return rb_distance(v1, m1, v2, m2, cityblock);
|
400
472
|
}
|
401
473
|
|
402
|
-
|
403
|
-
|
474
|
+
/*
|
475
|
+
Correlation distance measure
|
476
|
+
|
477
|
+
@overload correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
478
|
+
@param [Array] vector1 Numeric vector
|
479
|
+
@param [Array] vector2 Numeric vector
|
480
|
+
@param [Array] mask1 Optional mask for vector1
|
481
|
+
@param [Array] mask2 Optional mask for vector2
|
482
|
+
*/
|
483
|
+
VALUE rb_correlation(int argc, VALUE *argv, VALUE self) {
|
484
|
+
VALUE v1, v2, m1, m2;
|
485
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
486
|
+
return rb_distance(v1, m1, v2, m2, correlation);
|
404
487
|
}
|
405
488
|
|
406
|
-
|
407
|
-
|
489
|
+
/*
|
490
|
+
Uncentered correlation distance measure
|
491
|
+
|
492
|
+
@overload uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
493
|
+
@param [Array] vector1 Numeric vector
|
494
|
+
@param [Array] vector2 Numeric vector
|
495
|
+
@param [Array] mask1 Optional mask for vector1
|
496
|
+
@param [Array] mask2 Optional mask for vector2
|
497
|
+
*/
|
498
|
+
VALUE rb_ucorrelation(int argc, VALUE *argv, VALUE self) {
|
499
|
+
VALUE v1, v2, m1, m2;
|
500
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
501
|
+
return rb_distance(v1, m1, v2, m2, ucorrelation);
|
408
502
|
}
|
409
503
|
|
410
|
-
|
411
|
-
|
504
|
+
/*
|
505
|
+
Absolute correlation distance measure
|
506
|
+
|
507
|
+
@overload absolute_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
508
|
+
@param [Array] vector1 Numeric vector
|
509
|
+
@param [Array] vector2 Numeric vector
|
510
|
+
@param [Array] mask1 Optional mask for vector1
|
511
|
+
@param [Array] mask2 Optional mask for vector2
|
512
|
+
*/
|
513
|
+
VALUE rb_acorrelation(int argc, VALUE *argv, VALUE self) {
|
514
|
+
VALUE v1, v2, m1, m2;
|
515
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
516
|
+
return rb_distance(v1, m1, v2, m2, acorrelation);
|
412
517
|
}
|
413
518
|
|
414
|
-
|
415
|
-
|
519
|
+
/*
|
520
|
+
Absolute uncentered correlation distance measure
|
521
|
+
|
522
|
+
@overload absolute_uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
523
|
+
@param [Array] vector1 Numeric vector
|
524
|
+
@param [Array] vector2 Numeric vector
|
525
|
+
@param [Array] mask1 Optional mask for vector1
|
526
|
+
@param [Array] mask2 Optional mask for vector2
|
527
|
+
*/
|
528
|
+
VALUE rb_uacorrelation(int argc, VALUE *argv, VALUE self) {
|
529
|
+
VALUE v1, v2, m1, m2;
|
530
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
531
|
+
return rb_distance(v1, m1, v2, m2, uacorrelation);
|
416
532
|
}
|
417
533
|
|
418
|
-
|
419
|
-
|
534
|
+
/*
|
535
|
+
Spearman distance measure
|
536
|
+
|
537
|
+
@overload spearman_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
538
|
+
@param [Array] vector1 Numeric vector
|
539
|
+
@param [Array] vector2 Numeric vector
|
540
|
+
@param [Array] mask1 Optional mask for vector1
|
541
|
+
@param [Array] mask2 Optional mask for vector2
|
542
|
+
*/
|
543
|
+
VALUE rb_spearman(int argc, VALUE *argv, VALUE self) {
|
544
|
+
VALUE v1, v2, m1, m2;
|
545
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
546
|
+
return rb_distance(v1, m1, v2, m2, spearman);
|
420
547
|
}
|
421
548
|
|
422
|
-
|
423
|
-
|
549
|
+
/*
|
550
|
+
Kendall distance measure
|
551
|
+
|
552
|
+
@overload kendall_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
553
|
+
@param [Array] vector1 Numeric vector
|
554
|
+
@param [Array] vector2 Numeric vector
|
555
|
+
@param [Array] mask1 Optional mask for vector1
|
556
|
+
@param [Array] mask2 Optional mask for vector2
|
557
|
+
*/
|
558
|
+
VALUE rb_kendall(int argc, VALUE *argv, VALUE self) {
|
559
|
+
VALUE v1, v2, m1, m2;
|
560
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
561
|
+
return rb_distance(v1, m1, v2, m2, kendall);
|
424
562
|
}
|
425
563
|
|
426
564
|
|
427
565
|
void Init_flock(void) {
|
428
|
-
mFlock
|
429
|
-
|
430
|
-
|
431
|
-
|
566
|
+
mFlock = rb_define_module("Flock");
|
567
|
+
scFlock = rb_singleton_class(mFlock);
|
568
|
+
|
569
|
+
rb_define_private_method(scFlock, "do_kcluster", RUBY_METHOD_FUNC(rb_do_kcluster), -1);
|
570
|
+
rb_define_private_method(scFlock, "do_self_organizing_map", RUBY_METHOD_FUNC(rb_do_self_organizing_map), -1);
|
571
|
+
rb_define_private_method(scFlock, "do_treecluster", RUBY_METHOD_FUNC(rb_do_treecluster), -1);
|
432
572
|
|
573
|
+
/* kcluster method - K-Means */
|
433
574
|
rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
|
575
|
+
|
576
|
+
/* kcluster method - K-Medians */
|
434
577
|
rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
|
435
578
|
|
579
|
+
/* treecluster method - pairwise single-linkage clustering */
|
580
|
+
rb_define_const(mFlock, "METHOD_SINGLE_LINKAGE", INT2NUM('s'));
|
581
|
+
/* treecluster method - pairwise maximum- (or complete-) linkage clustering */
|
582
|
+
rb_define_const(mFlock, "METHOD_MAXIMUM_LINKAGE", INT2NUM('m'));
|
583
|
+
/* treecluster method - pairwise average-linkage clustering */
|
584
|
+
rb_define_const(mFlock, "METHOD_AVERAGE_LINKAGE", INT2NUM('a'));
|
585
|
+
/* treecluster method - pairwise centroid-linkage clustering */
|
586
|
+
rb_define_const(mFlock, "METHOD_CENTROID_LINKAGE", INT2NUM('c'));
|
587
|
+
|
588
|
+
|
436
589
|
rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
|
437
590
|
rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
|
438
591
|
rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
|
@@ -442,12 +595,27 @@ void Init_flock(void) {
|
|
442
595
|
rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
|
443
596
|
rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
|
444
597
|
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
598
|
+
/* Randomly assign data points to clusters using a uniform distribution. */
|
599
|
+
rb_define_const(mFlock, "SEED_RANDOM", INT2NUM(0));
|
600
|
+
|
601
|
+
/*
|
602
|
+
K-Means++ style initialization where data points are probabilistically assigned to clusters
|
603
|
+
based on their distance from closest cluster.
|
604
|
+
*/
|
605
|
+
rb_define_const(mFlock, "SEED_KMEANS_PLUSPLUS", INT2NUM(1));
|
606
|
+
|
607
|
+
/*
|
608
|
+
Deterministic cluster assignment by spreading out initial clusters as far away from each other
|
609
|
+
as possible.
|
610
|
+
*/
|
611
|
+
rb_define_const(mFlock, "SEED_SPREADOUT", INT2NUM(2));
|
612
|
+
|
613
|
+
rb_define_module_function(mFlock, "euclidian_distance", RUBY_METHOD_FUNC(rb_euclid), -1);
|
614
|
+
rb_define_module_function(mFlock, "cityblock_distance", RUBY_METHOD_FUNC(rb_cityblock), -1);
|
615
|
+
rb_define_module_function(mFlock, "correlation_distance", RUBY_METHOD_FUNC(rb_correlation), -1);
|
616
|
+
rb_define_module_function(mFlock, "absolute_correlation_distance", RUBY_METHOD_FUNC(rb_acorrelation), -1);
|
617
|
+
rb_define_module_function(mFlock, "uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_ucorrelation), -1);
|
618
|
+
rb_define_module_function(mFlock, "absolute_uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_uacorrelation), -1);
|
619
|
+
rb_define_module_function(mFlock, "spearman_distance", RUBY_METHOD_FUNC(rb_spearman), -1);
|
620
|
+
rb_define_module_function(mFlock, "kendall_distance", RUBY_METHOD_FUNC(rb_kendall), -1);
|
453
621
|
}
|