flock 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/API.rdoc +22 -0
- data/README.rdoc +44 -14
- data/Rakefile +5 -0
- data/VERSION +1 -1
- data/ext/cluster.c +2993 -2680
- data/ext/cluster.h +1 -1
- data/ext/flock.c +235 -67
- data/ext/kmeanspp.c +129 -0
- data/flock.gemspec +13 -18
- data/lib/flock.rb +229 -32
- metadata +10 -15
- data/examples/dense.rb +0 -38
- data/examples/som.rb +0 -13
- data/examples/sparse.rb +0 -22
- data/examples/treecluster.rb +0 -13
data/ext/cluster.h
CHANGED
@@ -55,7 +55,7 @@ void getclustermedoids(int nclusters, int nelements, double** distance,
|
|
55
55
|
int clusterid[], int centroids[], double errors[]);
|
56
56
|
void kcluster (int nclusters, int ngenes, int ndata, double** data,
|
57
57
|
int** mask, double weight[], int transpose, int npass, char method, char dist,
|
58
|
-
int clusterid[], double* error, int* ifound);
|
58
|
+
int clusterid[], double* error, int* ifound, int assign);
|
59
59
|
void kmedoids (int nclusters, int nelements, double** distance,
|
60
60
|
int npass, int clusterid[], double* error, int* ifound);
|
61
61
|
|
data/ext/flock.c
CHANGED
@@ -3,41 +3,60 @@
|
|
3
3
|
|
4
4
|
#define ID_CONST_GET rb_intern("const_get")
|
5
5
|
#define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
|
6
|
+
#define DEFAULT_ITERATIONS 100
|
6
7
|
|
7
|
-
static VALUE mFlock;
|
8
|
+
static VALUE mFlock, scFlock;
|
8
9
|
typedef double (*distance_fn)(int, double**, double**, int**, int**, const double [], int, int, int);
|
9
10
|
|
10
|
-
int
|
11
|
-
|
11
|
+
int get_int_option(VALUE option, char *key, int default_value) {
|
12
|
+
if (NIL_P(option)) return default_value;
|
12
13
|
|
13
|
-
|
14
|
-
|
14
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
15
|
+
return NIL_P(value) ? default_value : NUM2INT(value);
|
15
16
|
}
|
16
17
|
|
17
|
-
int
|
18
|
-
|
18
|
+
int get_bool_option(VALUE option, char *key, int default_value) {
|
19
|
+
if (NIL_P(option)) return default_value;
|
20
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
21
|
+
return (TYPE(value) == T_FALSE || TYPE(value) == T_NIL) ? 0 : 1;
|
22
|
+
}
|
23
|
+
|
24
|
+
double get_dbl_option(VALUE option, char *key, double default_value) {
|
25
|
+
if (NIL_P(option)) return default_value;
|
19
26
|
|
20
|
-
|
21
|
-
|
27
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
28
|
+
return NIL_P(value) ? default_value : NUM2DBL(value);
|
22
29
|
}
|
23
30
|
|
24
|
-
VALUE
|
31
|
+
VALUE get_value_option(VALUE option, char *key, VALUE default_value) {
|
32
|
+
if (NIL_P(option)) return default_value;
|
33
|
+
|
34
|
+
VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
|
35
|
+
return NIL_P(value) ? default_value : value;
|
36
|
+
}
|
37
|
+
|
38
|
+
/* @api private */
|
39
|
+
VALUE rb_do_kcluster(int argc, VALUE *argv, VALUE self) {
|
25
40
|
VALUE size, data, mask, weights, options;
|
26
|
-
rb_scan_args(argc, argv, "
|
41
|
+
rb_scan_args(argc, argv, "21", &size, &data, &options);
|
27
42
|
|
28
43
|
if (TYPE(data) != T_ARRAY)
|
29
44
|
rb_raise(rb_eArgError, "data should be an array of arrays");
|
30
45
|
|
46
|
+
if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
|
47
|
+
rb_raise(rb_eArgError, "size should be > 0 and <= data size");
|
48
|
+
|
49
|
+
mask = get_value_option(options, "mask", Qnil);
|
50
|
+
|
31
51
|
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
32
52
|
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
33
53
|
|
34
|
-
|
35
|
-
|
54
|
+
int transpose = get_bool_option(options, "transpose", 0);
|
55
|
+
int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
|
36
56
|
|
37
|
-
int transpose = opt_int_value(options, "transpose", 0);
|
38
|
-
int npass = opt_int_value(options, "iterations", 1000);
|
39
57
|
// a = average, m = means
|
40
|
-
int method =
|
58
|
+
int method = get_int_option(options, "method", 'a');
|
59
|
+
|
41
60
|
// e = euclidian,
|
42
61
|
// b = city-block distance
|
43
62
|
// c = correlation
|
@@ -46,7 +65,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
46
65
|
// x = absolute uncentered correlation
|
47
66
|
// s = spearman's rank correlation
|
48
67
|
// k = kendall's tau
|
49
|
-
int dist =
|
68
|
+
int dist = get_int_option(options, "metric", 'e');
|
69
|
+
|
70
|
+
// initial assignment
|
71
|
+
int assign = get_int_option(options, "seed", 0);
|
50
72
|
|
51
73
|
int i,j;
|
52
74
|
int nrows = RARRAY_LEN(data);
|
@@ -94,7 +116,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
94
116
|
double error;
|
95
117
|
|
96
118
|
kcluster(nsets,
|
97
|
-
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
|
119
|
+
nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound, assign);
|
98
120
|
getclustercentroids(nsets,
|
99
121
|
nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
|
100
122
|
|
@@ -137,13 +159,16 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
|
|
137
159
|
return result;
|
138
160
|
}
|
139
161
|
|
140
|
-
|
162
|
+
/* @api private */
|
163
|
+
VALUE rb_do_self_organizing_map(int argc, VALUE *argv, VALUE self) {
|
141
164
|
VALUE nx, ny, data, mask, weights, options;
|
142
|
-
rb_scan_args(argc, argv, "
|
165
|
+
rb_scan_args(argc, argv, "31", &nx, &ny, &data, &options);
|
143
166
|
|
144
167
|
if (TYPE(data) != T_ARRAY)
|
145
168
|
rb_raise(rb_eArgError, "data should be an array of arrays");
|
146
169
|
|
170
|
+
mask = get_value_option(options, "mask", Qnil);
|
171
|
+
|
147
172
|
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
148
173
|
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
149
174
|
|
@@ -155,8 +180,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
155
180
|
|
156
181
|
int nxgrid = NUM2INT(rb_Integer(nx));
|
157
182
|
int nygrid = NUM2INT(rb_Integer(ny));
|
158
|
-
int transpose =
|
159
|
-
int npass =
|
183
|
+
int transpose = get_int_option(options, "transpose", 0);
|
184
|
+
int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
|
160
185
|
|
161
186
|
// e = euclidian,
|
162
187
|
// b = city-block distance
|
@@ -166,8 +191,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
166
191
|
// x = absolute uncentered correlation
|
167
192
|
// s = spearman's rank correlation
|
168
193
|
// k = kendall's tau
|
169
|
-
int dist =
|
170
|
-
double tau =
|
194
|
+
int dist = get_int_option(options, "metric", 'e');
|
195
|
+
double tau = get_dbl_option(options, "tau", 1.0);
|
171
196
|
|
172
197
|
int i, j, k;
|
173
198
|
int nrows = RARRAY_LEN(data);
|
@@ -211,8 +236,7 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
211
236
|
ccelldata[i][j] = (double *)malloc(sizeof(double)*dimy);
|
212
237
|
}
|
213
238
|
|
214
|
-
somcluster(nrows, ncols, cdata, cmask, cweights, transpose,
|
215
|
-
nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
|
239
|
+
somcluster(nrows, ncols, cdata, cmask, cweights, transpose, nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
|
216
240
|
|
217
241
|
VALUE result = rb_hash_new();
|
218
242
|
VALUE cluster = rb_ary_new();
|
@@ -260,22 +284,30 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
|
|
260
284
|
return result;
|
261
285
|
}
|
262
286
|
|
263
|
-
|
287
|
+
/* @api private */
|
288
|
+
VALUE rb_do_treecluster(int argc, VALUE *argv, VALUE self) {
|
264
289
|
VALUE size, data, mask, weights, options;
|
265
|
-
rb_scan_args(argc, argv, "
|
290
|
+
rb_scan_args(argc, argv, "21", &size, &data, &options);
|
266
291
|
|
267
292
|
if (TYPE(data) != T_ARRAY)
|
268
293
|
rb_raise(rb_eArgError, "data should be an array of arrays");
|
269
294
|
|
295
|
+
mask = get_value_option(options, "mask", Qnil);
|
296
|
+
|
270
297
|
if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
|
271
298
|
rb_raise(rb_eArgError, "mask should be an array of arrays");
|
272
299
|
|
273
300
|
if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
|
274
301
|
rb_raise(rb_eArgError, "size should be > 0 and <= data size");
|
275
302
|
|
276
|
-
int transpose =
|
277
|
-
|
278
|
-
|
303
|
+
int transpose = get_int_option(options, "transpose", 0);
|
304
|
+
|
305
|
+
// s: pairwise single-linkage clustering
|
306
|
+
// m: pairwise maximum- (or complete-) linkage clustering
|
307
|
+
// a: pairwise average-linkage clustering
|
308
|
+
// c: pairwise centroid-linkage clustering
|
309
|
+
int method = get_int_option(options, "method", 'a');
|
310
|
+
|
279
311
|
// e = euclidian,
|
280
312
|
// b = city-block distance
|
281
313
|
// c = correlation
|
@@ -284,7 +316,7 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
|
|
284
316
|
// x = absolute uncentered correlation
|
285
317
|
// s = spearman's rank correlation
|
286
318
|
// k = kendall's tau
|
287
|
-
int dist =
|
319
|
+
int dist = get_int_option(options, "metric", 'e');
|
288
320
|
|
289
321
|
int i,j;
|
290
322
|
int nrows = RARRAY_LEN(data);
|
@@ -346,15 +378,25 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
|
|
346
378
|
if (tree)
|
347
379
|
free(tree);
|
348
380
|
else
|
349
|
-
rb_raise(rb_eNoMemError, "
|
381
|
+
rb_raise(rb_eNoMemError, "treecluster ran out of memory");
|
350
382
|
|
351
383
|
return result;
|
352
384
|
}
|
353
385
|
|
354
|
-
|
386
|
+
void inline copy_mask(VALUE src, int *dst, int size, int def) {
|
387
|
+
int i;
|
388
|
+
if (NIL_P(src))
|
389
|
+
for (i = 0; i < size; i++)
|
390
|
+
dst[i] = def;
|
391
|
+
else
|
392
|
+
for (i = 0; i < size; i++)
|
393
|
+
dst[i] = NUM2INT(rb_ary_entry(src, i));
|
394
|
+
}
|
395
|
+
|
396
|
+
VALUE rb_distance(VALUE vec1, VALUE m1, VALUE vec2, VALUE m2, distance_fn fn) {
|
355
397
|
uint32_t size;
|
356
398
|
double *data1, *data2, *weight, dist;
|
357
|
-
int *
|
399
|
+
int *mask1, *mask2, i;
|
358
400
|
|
359
401
|
if (TYPE(vec1) != T_ARRAY)
|
360
402
|
rb_raise(rb_eArgError, "vector1 should be an array");
|
@@ -373,17 +415,21 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
|
|
373
415
|
data1 = (double *)malloc(sizeof(double)*size);
|
374
416
|
data2 = (double *)malloc(sizeof(double)*size);
|
375
417
|
weight = (double *)malloc(sizeof(double)*size);
|
376
|
-
|
418
|
+
mask1 = (int *)malloc(sizeof(int)*size);
|
419
|
+
mask2 = (int *)malloc(sizeof(int)*size);
|
377
420
|
|
378
421
|
for (i = 0; i < size; i++) {
|
379
|
-
mask[i] = 1;
|
380
422
|
weight[i] = 1;
|
381
423
|
data1[i] = NUM2DBL(rb_ary_entry(vec1, i));
|
382
424
|
data2[i] = NUM2DBL(rb_ary_entry(vec2, i));
|
383
425
|
}
|
384
426
|
|
385
|
-
|
386
|
-
|
427
|
+
copy_mask(m1, mask1, size, 1);
|
428
|
+
copy_mask(m2, mask2, size, 1);
|
429
|
+
|
430
|
+
dist = fn(size, &data1, &data2, &mask1, &mask2, weight, 0, 0, 0);
|
431
|
+
free(mask1);
|
432
|
+
free(mask2);
|
387
433
|
free(weight);
|
388
434
|
free(data2);
|
389
435
|
free(data1);
|
@@ -391,48 +437,155 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
|
|
391
437
|
return DBL2NUM(dist);
|
392
438
|
}
|
393
439
|
|
394
|
-
|
395
|
-
|
440
|
+
/*
|
441
|
+
Euclidian distance measure
|
442
|
+
|
443
|
+
@example
|
444
|
+
Flock.euclidian_distance([0, 0], [1, 1])
|
445
|
+
Flock.euclidian_distance([0, 0, 0], [1, 1, 1], [1, 1, 0], [1, 1, 0]) # with mask
|
446
|
+
|
447
|
+
@overload euclidian_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
448
|
+
@param [Array] vector1 Numeric vector
|
449
|
+
@param [Array] vector2 Numeric vector
|
450
|
+
@param [Array] mask1 Optional mask for vector1
|
451
|
+
@param [Array] mask2 Optional mask for vector2
|
452
|
+
*/
|
453
|
+
VALUE rb_euclid(int argc, VALUE *argv, VALUE self) {
|
454
|
+
VALUE v1, v2, m1, m2;
|
455
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
456
|
+
return rb_distance(v1, m1, v2, m2, euclid);
|
396
457
|
}
|
397
458
|
|
398
|
-
|
399
|
-
|
459
|
+
/*
|
460
|
+
Cityblock distance measure
|
461
|
+
|
462
|
+
@overload cityblock_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
463
|
+
@param [Array] vector1 Numeric vector
|
464
|
+
@param [Array] vector2 Numeric vector
|
465
|
+
@param [Array] mask1 Optional mask for vector1
|
466
|
+
@param [Array] mask2 Optional mask for vector2
|
467
|
+
*/
|
468
|
+
VALUE rb_cityblock(int argc, VALUE *argv, VALUE self) {
|
469
|
+
VALUE v1, v2, m1, m2;
|
470
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
471
|
+
return rb_distance(v1, m1, v2, m2, cityblock);
|
400
472
|
}
|
401
473
|
|
402
|
-
|
403
|
-
|
474
|
+
/*
|
475
|
+
Correlation distance measure
|
476
|
+
|
477
|
+
@overload correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
478
|
+
@param [Array] vector1 Numeric vector
|
479
|
+
@param [Array] vector2 Numeric vector
|
480
|
+
@param [Array] mask1 Optional mask for vector1
|
481
|
+
@param [Array] mask2 Optional mask for vector2
|
482
|
+
*/
|
483
|
+
VALUE rb_correlation(int argc, VALUE *argv, VALUE self) {
|
484
|
+
VALUE v1, v2, m1, m2;
|
485
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
486
|
+
return rb_distance(v1, m1, v2, m2, correlation);
|
404
487
|
}
|
405
488
|
|
406
|
-
|
407
|
-
|
489
|
+
/*
|
490
|
+
Uncentered correlation distance measure
|
491
|
+
|
492
|
+
@overload uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
493
|
+
@param [Array] vector1 Numeric vector
|
494
|
+
@param [Array] vector2 Numeric vector
|
495
|
+
@param [Array] mask1 Optional mask for vector1
|
496
|
+
@param [Array] mask2 Optional mask for vector2
|
497
|
+
*/
|
498
|
+
VALUE rb_ucorrelation(int argc, VALUE *argv, VALUE self) {
|
499
|
+
VALUE v1, v2, m1, m2;
|
500
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
501
|
+
return rb_distance(v1, m1, v2, m2, ucorrelation);
|
408
502
|
}
|
409
503
|
|
410
|
-
|
411
|
-
|
504
|
+
/*
|
505
|
+
Absolute correlation distance measure
|
506
|
+
|
507
|
+
@overload absolute_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
508
|
+
@param [Array] vector1 Numeric vector
|
509
|
+
@param [Array] vector2 Numeric vector
|
510
|
+
@param [Array] mask1 Optional mask for vector1
|
511
|
+
@param [Array] mask2 Optional mask for vector2
|
512
|
+
*/
|
513
|
+
VALUE rb_acorrelation(int argc, VALUE *argv, VALUE self) {
|
514
|
+
VALUE v1, v2, m1, m2;
|
515
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
516
|
+
return rb_distance(v1, m1, v2, m2, acorrelation);
|
412
517
|
}
|
413
518
|
|
414
|
-
|
415
|
-
|
519
|
+
/*
|
520
|
+
Absolute uncentered correlation distance measure
|
521
|
+
|
522
|
+
@overload absolute_uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
523
|
+
@param [Array] vector1 Numeric vector
|
524
|
+
@param [Array] vector2 Numeric vector
|
525
|
+
@param [Array] mask1 Optional mask for vector1
|
526
|
+
@param [Array] mask2 Optional mask for vector2
|
527
|
+
*/
|
528
|
+
VALUE rb_uacorrelation(int argc, VALUE *argv, VALUE self) {
|
529
|
+
VALUE v1, v2, m1, m2;
|
530
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
531
|
+
return rb_distance(v1, m1, v2, m2, uacorrelation);
|
416
532
|
}
|
417
533
|
|
418
|
-
|
419
|
-
|
534
|
+
/*
|
535
|
+
Spearman distance measure
|
536
|
+
|
537
|
+
@overload spearman_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
538
|
+
@param [Array] vector1 Numeric vector
|
539
|
+
@param [Array] vector2 Numeric vector
|
540
|
+
@param [Array] mask1 Optional mask for vector1
|
541
|
+
@param [Array] mask2 Optional mask for vector2
|
542
|
+
*/
|
543
|
+
VALUE rb_spearman(int argc, VALUE *argv, VALUE self) {
|
544
|
+
VALUE v1, v2, m1, m2;
|
545
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
546
|
+
return rb_distance(v1, m1, v2, m2, spearman);
|
420
547
|
}
|
421
548
|
|
422
|
-
|
423
|
-
|
549
|
+
/*
|
550
|
+
Kendall distance measure
|
551
|
+
|
552
|
+
@overload kendall_distance(vector1, vector2, mask1 = identity, mask2 = identity)
|
553
|
+
@param [Array] vector1 Numeric vector
|
554
|
+
@param [Array] vector2 Numeric vector
|
555
|
+
@param [Array] mask1 Optional mask for vector1
|
556
|
+
@param [Array] mask2 Optional mask for vector2
|
557
|
+
*/
|
558
|
+
VALUE rb_kendall(int argc, VALUE *argv, VALUE self) {
|
559
|
+
VALUE v1, v2, m1, m2;
|
560
|
+
rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
|
561
|
+
return rb_distance(v1, m1, v2, m2, kendall);
|
424
562
|
}
|
425
563
|
|
426
564
|
|
427
565
|
void Init_flock(void) {
|
428
|
-
mFlock
|
429
|
-
|
430
|
-
|
431
|
-
|
566
|
+
mFlock = rb_define_module("Flock");
|
567
|
+
scFlock = rb_singleton_class(mFlock);
|
568
|
+
|
569
|
+
rb_define_private_method(scFlock, "do_kcluster", RUBY_METHOD_FUNC(rb_do_kcluster), -1);
|
570
|
+
rb_define_private_method(scFlock, "do_self_organizing_map", RUBY_METHOD_FUNC(rb_do_self_organizing_map), -1);
|
571
|
+
rb_define_private_method(scFlock, "do_treecluster", RUBY_METHOD_FUNC(rb_do_treecluster), -1);
|
432
572
|
|
573
|
+
/* kcluster method - K-Means */
|
433
574
|
rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
|
575
|
+
|
576
|
+
/* kcluster method - K-Medians */
|
434
577
|
rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
|
435
578
|
|
579
|
+
/* treecluster method - pairwise single-linkage clustering */
|
580
|
+
rb_define_const(mFlock, "METHOD_SINGLE_LINKAGE", INT2NUM('s'));
|
581
|
+
/* treecluster method - pairwise maximum- (or complete-) linkage clustering */
|
582
|
+
rb_define_const(mFlock, "METHOD_MAXIMUM_LINKAGE", INT2NUM('m'));
|
583
|
+
/* treecluster method - pairwise average-linkage clustering */
|
584
|
+
rb_define_const(mFlock, "METHOD_AVERAGE_LINKAGE", INT2NUM('a'));
|
585
|
+
/* treecluster method - pairwise centroid-linkage clustering */
|
586
|
+
rb_define_const(mFlock, "METHOD_CENTROID_LINKAGE", INT2NUM('c'));
|
587
|
+
|
588
|
+
|
436
589
|
rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
|
437
590
|
rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
|
438
591
|
rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
|
@@ -442,12 +595,27 @@ void Init_flock(void) {
|
|
442
595
|
rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
|
443
596
|
rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
|
444
597
|
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
598
|
+
/* Randomly assign data points to clusters using a uniform distribution. */
|
599
|
+
rb_define_const(mFlock, "SEED_RANDOM", INT2NUM(0));
|
600
|
+
|
601
|
+
/*
|
602
|
+
K-Means++ style initialization where data points are probabilistically assigned to clusters
|
603
|
+
based on their distance from closest cluster.
|
604
|
+
*/
|
605
|
+
rb_define_const(mFlock, "SEED_KMEANS_PLUSPLUS", INT2NUM(1));
|
606
|
+
|
607
|
+
/*
|
608
|
+
Deterministic cluster assignment by spreading out initial clusters as far away from each other
|
609
|
+
as possible.
|
610
|
+
*/
|
611
|
+
rb_define_const(mFlock, "SEED_SPREADOUT", INT2NUM(2));
|
612
|
+
|
613
|
+
rb_define_module_function(mFlock, "euclidian_distance", RUBY_METHOD_FUNC(rb_euclid), -1);
|
614
|
+
rb_define_module_function(mFlock, "cityblock_distance", RUBY_METHOD_FUNC(rb_cityblock), -1);
|
615
|
+
rb_define_module_function(mFlock, "correlation_distance", RUBY_METHOD_FUNC(rb_correlation), -1);
|
616
|
+
rb_define_module_function(mFlock, "absolute_correlation_distance", RUBY_METHOD_FUNC(rb_acorrelation), -1);
|
617
|
+
rb_define_module_function(mFlock, "uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_ucorrelation), -1);
|
618
|
+
rb_define_module_function(mFlock, "absolute_uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_uacorrelation), -1);
|
619
|
+
rb_define_module_function(mFlock, "spearman_distance", RUBY_METHOD_FUNC(rb_spearman), -1);
|
620
|
+
rb_define_module_function(mFlock, "kendall_distance", RUBY_METHOD_FUNC(rb_kendall), -1);
|
453
621
|
}
|