flock 0.4.1 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -55,7 +55,7 @@ void getclustermedoids(int nclusters, int nelements, double** distance,
55
55
  int clusterid[], int centroids[], double errors[]);
56
56
  void kcluster (int nclusters, int ngenes, int ndata, double** data,
57
57
  int** mask, double weight[], int transpose, int npass, char method, char dist,
58
- int clusterid[], double* error, int* ifound);
58
+ int clusterid[], double* error, int* ifound, int assign);
59
59
  void kmedoids (int nclusters, int nelements, double** distance,
60
60
  int npass, int clusterid[], double* error, int* ifound);
61
61
 
@@ -3,41 +3,60 @@
3
3
 
4
4
  #define ID_CONST_GET rb_intern("const_get")
5
5
  #define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
6
+ #define DEFAULT_ITERATIONS 100
6
7
 
7
- static VALUE mFlock;
8
+ static VALUE mFlock, scFlock;
8
9
  typedef double (*distance_fn)(int, double**, double**, int**, int**, const double [], int, int, int);
9
10
 
10
- int opt_int_value(VALUE option, char *key, int def) {
11
- if (NIL_P(option)) return def;
11
+ int get_int_option(VALUE option, char *key, int default_value) {
12
+ if (NIL_P(option)) return default_value;
12
13
 
13
- VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
14
- return NIL_P(value) ? def : NUM2INT(value);
14
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
15
+ return NIL_P(value) ? default_value : NUM2INT(value);
15
16
  }
16
17
 
17
- int opt_double_value(VALUE option, char *key, double def) {
18
- if (NIL_P(option)) return def;
18
+ int get_bool_option(VALUE option, char *key, int default_value) {
19
+ if (NIL_P(option)) return default_value;
20
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
21
+ return (TYPE(value) == T_FALSE || TYPE(value) == T_NIL) ? 0 : 1;
22
+ }
23
+
24
+ double get_dbl_option(VALUE option, char *key, double default_value) {
25
+ if (NIL_P(option)) return default_value;
19
26
 
20
- VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
21
- return NIL_P(value) ? def : NUM2DBL(value);
27
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
28
+ return NIL_P(value) ? default_value : NUM2DBL(value);
22
29
  }
23
30
 
24
- VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
31
+ VALUE get_value_option(VALUE option, char *key, VALUE default_value) {
32
+ if (NIL_P(option)) return default_value;
33
+
34
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
35
+ return NIL_P(value) ? default_value : value;
36
+ }
37
+
38
+ /* @api private */
39
+ VALUE rb_do_kcluster(int argc, VALUE *argv, VALUE self) {
25
40
  VALUE size, data, mask, weights, options;
26
- rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
41
+ rb_scan_args(argc, argv, "21", &size, &data, &options);
27
42
 
28
43
  if (TYPE(data) != T_ARRAY)
29
44
  rb_raise(rb_eArgError, "data should be an array of arrays");
30
45
 
46
+ if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
47
+ rb_raise(rb_eArgError, "size should be > 0 and <= data size");
48
+
49
+ mask = get_value_option(options, "mask", Qnil);
50
+
31
51
  if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
32
52
  rb_raise(rb_eArgError, "mask should be an array of arrays");
33
53
 
34
- if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
35
- rb_raise(rb_eArgError, "size should be > 0 and <= data size");
54
+ int transpose = get_bool_option(options, "transpose", 0);
55
+ int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
36
56
 
37
- int transpose = opt_int_value(options, "transpose", 0);
38
- int npass = opt_int_value(options, "iterations", 1000);
39
57
  // a = average, m = means
40
- int method = opt_int_value(options, "method", 'a');
58
+ int method = get_int_option(options, "method", 'a');
59
+
41
60
  // e = euclidian,
42
61
  // b = city-block distance
43
62
  // c = correlation
@@ -46,7 +65,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
46
65
  // x = absolute uncentered correlation
47
66
  // s = spearman's rank correlation
48
67
  // k = kendall's tau
49
- int dist = opt_int_value(options, "metric", 'e');
68
+ int dist = get_int_option(options, "metric", 'e');
69
+
70
+ // initial assignment
71
+ int assign = get_int_option(options, "seed", 0);
50
72
 
51
73
  int i,j;
52
74
  int nrows = RARRAY_LEN(data);
@@ -94,7 +116,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
94
116
  double error;
95
117
 
96
118
  kcluster(nsets,
97
- nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
119
+ nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound, assign);
98
120
  getclustercentroids(nsets,
99
121
  nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
100
122
 
@@ -137,13 +159,16 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
137
159
  return result;
138
160
  }
139
161
 
140
- VALUE rb_som(int argc, VALUE *argv, VALUE self) {
162
+ /* @api private */
163
+ VALUE rb_do_self_organizing_map(int argc, VALUE *argv, VALUE self) {
141
164
  VALUE nx, ny, data, mask, weights, options;
142
- rb_scan_args(argc, argv, "32", &nx, &ny, &data, &mask, &options);
165
+ rb_scan_args(argc, argv, "31", &nx, &ny, &data, &options);
143
166
 
144
167
  if (TYPE(data) != T_ARRAY)
145
168
  rb_raise(rb_eArgError, "data should be an array of arrays");
146
169
 
170
+ mask = get_value_option(options, "mask", Qnil);
171
+
147
172
  if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
148
173
  rb_raise(rb_eArgError, "mask should be an array of arrays");
149
174
 
@@ -155,8 +180,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
155
180
 
156
181
  int nxgrid = NUM2INT(rb_Integer(nx));
157
182
  int nygrid = NUM2INT(rb_Integer(ny));
158
- int transpose = opt_int_value(options, "transpose", 0);
159
- int npass = opt_int_value(options, "iterations", 1000);
183
+ int transpose = get_int_option(options, "transpose", 0);
184
+ int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
160
185
 
161
186
  // e = euclidian,
162
187
  // b = city-block distance
@@ -166,8 +191,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
166
191
  // x = absolute uncentered correlation
167
192
  // s = spearman's rank correlation
168
193
  // k = kendall's tau
169
- int dist = opt_int_value(options, "metric", 'e');
170
- double tau = opt_double_value(options, "tau", 1.0);
194
+ int dist = get_int_option(options, "metric", 'e');
195
+ double tau = get_dbl_option(options, "tau", 1.0);
171
196
 
172
197
  int i, j, k;
173
198
  int nrows = RARRAY_LEN(data);
@@ -211,8 +236,7 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
211
236
  ccelldata[i][j] = (double *)malloc(sizeof(double)*dimy);
212
237
  }
213
238
 
214
- somcluster(nrows, ncols, cdata, cmask, cweights, transpose,
215
- nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
239
+ somcluster(nrows, ncols, cdata, cmask, cweights, transpose, nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
216
240
 
217
241
  VALUE result = rb_hash_new();
218
242
  VALUE cluster = rb_ary_new();
@@ -260,22 +284,30 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
260
284
  return result;
261
285
  }
262
286
 
263
- VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
287
+ /* @api private */
288
+ VALUE rb_do_treecluster(int argc, VALUE *argv, VALUE self) {
264
289
  VALUE size, data, mask, weights, options;
265
- rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
290
+ rb_scan_args(argc, argv, "21", &size, &data, &options);
266
291
 
267
292
  if (TYPE(data) != T_ARRAY)
268
293
  rb_raise(rb_eArgError, "data should be an array of arrays");
269
294
 
295
+ mask = get_value_option(options, "mask", Qnil);
296
+
270
297
  if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
271
298
  rb_raise(rb_eArgError, "mask should be an array of arrays");
272
299
 
273
300
  if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
274
301
  rb_raise(rb_eArgError, "size should be > 0 and <= data size");
275
302
 
276
- int transpose = opt_int_value(options, "transpose", 0);
277
- // a = average, m = means
278
- int method = opt_int_value(options, "method", 'a');
303
+ int transpose = get_int_option(options, "transpose", 0);
304
+
305
+ // s: pairwise single-linkage clustering
306
+ // m: pairwise maximum- (or complete-) linkage clustering
307
+ // a: pairwise average-linkage clustering
308
+ // c: pairwise centroid-linkage clustering
309
+ int method = get_int_option(options, "method", 'a');
310
+
279
311
  // e = euclidian,
280
312
  // b = city-block distance
281
313
  // c = correlation
@@ -284,7 +316,7 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
284
316
  // x = absolute uncentered correlation
285
317
  // s = spearman's rank correlation
286
318
  // k = kendall's tau
287
- int dist = opt_int_value(options, "metric", 'e');
319
+ int dist = get_int_option(options, "metric", 'e');
288
320
 
289
321
  int i,j;
290
322
  int nrows = RARRAY_LEN(data);
@@ -346,15 +378,25 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
346
378
  if (tree)
347
379
  free(tree);
348
380
  else
349
- rb_raise(rb_eNoMemError, "tree cluster ran out of memory");
381
+ rb_raise(rb_eNoMemError, "treecluster ran out of memory");
350
382
 
351
383
  return result;
352
384
  }
353
385
 
354
- VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
386
+ void inline copy_mask(VALUE src, int *dst, int size, int def) {
387
+ int i;
388
+ if (NIL_P(src))
389
+ for (i = 0; i < size; i++)
390
+ dst[i] = def;
391
+ else
392
+ for (i = 0; i < size; i++)
393
+ dst[i] = NUM2INT(rb_ary_entry(src, i));
394
+ }
395
+
396
+ VALUE rb_distance(VALUE vec1, VALUE m1, VALUE vec2, VALUE m2, distance_fn fn) {
355
397
  uint32_t size;
356
398
  double *data1, *data2, *weight, dist;
357
- int *mask, i;
399
+ int *mask1, *mask2, i;
358
400
 
359
401
  if (TYPE(vec1) != T_ARRAY)
360
402
  rb_raise(rb_eArgError, "vector1 should be an array");
@@ -373,17 +415,21 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
373
415
  data1 = (double *)malloc(sizeof(double)*size);
374
416
  data2 = (double *)malloc(sizeof(double)*size);
375
417
  weight = (double *)malloc(sizeof(double)*size);
376
- mask = (int *)malloc(sizeof(int)*size);
418
+ mask1 = (int *)malloc(sizeof(int)*size);
419
+ mask2 = (int *)malloc(sizeof(int)*size);
377
420
 
378
421
  for (i = 0; i < size; i++) {
379
- mask[i] = 1;
380
422
  weight[i] = 1;
381
423
  data1[i] = NUM2DBL(rb_ary_entry(vec1, i));
382
424
  data2[i] = NUM2DBL(rb_ary_entry(vec2, i));
383
425
  }
384
426
 
385
- dist = fn(size, &data1, &data2, &mask, &mask, weight, 0, 0, 0);
386
- free(mask);
427
+ copy_mask(m1, mask1, size, 1);
428
+ copy_mask(m2, mask2, size, 1);
429
+
430
+ dist = fn(size, &data1, &data2, &mask1, &mask2, weight, 0, 0, 0);
431
+ free(mask1);
432
+ free(mask2);
387
433
  free(weight);
388
434
  free(data2);
389
435
  free(data1);
@@ -391,48 +437,155 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
391
437
  return DBL2NUM(dist);
392
438
  }
393
439
 
394
- VALUE rb_euclid(VALUE self, VALUE vec1, VALUE vec2) {
395
- return rb_distance(vec1, vec2, euclid);
440
+ /*
441
+ Euclidian distance measure
442
+
443
+ @example
444
+ Flock.euclidian_distance([0, 0], [1, 1])
445
+ Flock.euclidian_distance([0, 0, 0], [1, 1, 1], [1, 1, 0], [1, 1, 0]) # with mask
446
+
447
+ @overload euclidian_distance(vector1, vector2, mask1 = identity, mask2 = identity)
448
+ @param [Array] vector1 Numeric vector
449
+ @param [Array] vector2 Numeric vector
450
+ @param [Array] mask1 Optional mask for vector1
451
+ @param [Array] mask2 Optional mask for vector2
452
+ */
453
+ VALUE rb_euclid(int argc, VALUE *argv, VALUE self) {
454
+ VALUE v1, v2, m1, m2;
455
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
456
+ return rb_distance(v1, m1, v2, m2, euclid);
396
457
  }
397
458
 
398
- VALUE rb_cityblock(VALUE self, VALUE vec1, VALUE vec2) {
399
- return rb_distance(vec1, vec2, cityblock);
459
+ /*
460
+ Cityblock distance measure
461
+
462
+ @overload cityblock_distance(vector1, vector2, mask1 = identity, mask2 = identity)
463
+ @param [Array] vector1 Numeric vector
464
+ @param [Array] vector2 Numeric vector
465
+ @param [Array] mask1 Optional mask for vector1
466
+ @param [Array] mask2 Optional mask for vector2
467
+ */
468
+ VALUE rb_cityblock(int argc, VALUE *argv, VALUE self) {
469
+ VALUE v1, v2, m1, m2;
470
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
471
+ return rb_distance(v1, m1, v2, m2, cityblock);
400
472
  }
401
473
 
402
- VALUE rb_correlation(VALUE self, VALUE vec1, VALUE vec2) {
403
- return rb_distance(vec1, vec2, correlation);
474
+ /*
475
+ Correlation distance measure
476
+
477
+ @overload correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
478
+ @param [Array] vector1 Numeric vector
479
+ @param [Array] vector2 Numeric vector
480
+ @param [Array] mask1 Optional mask for vector1
481
+ @param [Array] mask2 Optional mask for vector2
482
+ */
483
+ VALUE rb_correlation(int argc, VALUE *argv, VALUE self) {
484
+ VALUE v1, v2, m1, m2;
485
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
486
+ return rb_distance(v1, m1, v2, m2, correlation);
404
487
  }
405
488
 
406
- VALUE rb_ucorrelation(VALUE self, VALUE vec1, VALUE vec2) {
407
- return rb_distance(vec1, vec2, ucorrelation);
489
+ /*
490
+ Uncentered correlation distance measure
491
+
492
+ @overload uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
493
+ @param [Array] vector1 Numeric vector
494
+ @param [Array] vector2 Numeric vector
495
+ @param [Array] mask1 Optional mask for vector1
496
+ @param [Array] mask2 Optional mask for vector2
497
+ */
498
+ VALUE rb_ucorrelation(int argc, VALUE *argv, VALUE self) {
499
+ VALUE v1, v2, m1, m2;
500
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
501
+ return rb_distance(v1, m1, v2, m2, ucorrelation);
408
502
  }
409
503
 
410
- VALUE rb_acorrelation(VALUE self, VALUE vec1, VALUE vec2) {
411
- return rb_distance(vec1, vec2, acorrelation);
504
+ /*
505
+ Absolute correlation distance measure
506
+
507
+ @overload absolute_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
508
+ @param [Array] vector1 Numeric vector
509
+ @param [Array] vector2 Numeric vector
510
+ @param [Array] mask1 Optional mask for vector1
511
+ @param [Array] mask2 Optional mask for vector2
512
+ */
513
+ VALUE rb_acorrelation(int argc, VALUE *argv, VALUE self) {
514
+ VALUE v1, v2, m1, m2;
515
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
516
+ return rb_distance(v1, m1, v2, m2, acorrelation);
412
517
  }
413
518
 
414
- VALUE rb_uacorrelation(VALUE self, VALUE vec1, VALUE vec2) {
415
- return rb_distance(vec1, vec2, uacorrelation);
519
+ /*
520
+ Absolute uncentered correlation distance measure
521
+
522
+ @overload absolute_uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
523
+ @param [Array] vector1 Numeric vector
524
+ @param [Array] vector2 Numeric vector
525
+ @param [Array] mask1 Optional mask for vector1
526
+ @param [Array] mask2 Optional mask for vector2
527
+ */
528
+ VALUE rb_uacorrelation(int argc, VALUE *argv, VALUE self) {
529
+ VALUE v1, v2, m1, m2;
530
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
531
+ return rb_distance(v1, m1, v2, m2, uacorrelation);
416
532
  }
417
533
 
418
- VALUE rb_spearman(VALUE self, VALUE vec1, VALUE vec2) {
419
- return rb_distance(vec1, vec2, spearman);
534
+ /*
535
+ Spearman distance measure
536
+
537
+ @overload spearman_distance(vector1, vector2, mask1 = identity, mask2 = identity)
538
+ @param [Array] vector1 Numeric vector
539
+ @param [Array] vector2 Numeric vector
540
+ @param [Array] mask1 Optional mask for vector1
541
+ @param [Array] mask2 Optional mask for vector2
542
+ */
543
+ VALUE rb_spearman(int argc, VALUE *argv, VALUE self) {
544
+ VALUE v1, v2, m1, m2;
545
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
546
+ return rb_distance(v1, m1, v2, m2, spearman);
420
547
  }
421
548
 
422
- VALUE rb_kendall(VALUE self, VALUE vec1, VALUE vec2) {
423
- return rb_distance(vec1, vec2, kendall);
549
+ /*
550
+ Kendall distance measure
551
+
552
+ @overload kendall_distance(vector1, vector2, mask1 = identity, mask2 = identity)
553
+ @param [Array] vector1 Numeric vector
554
+ @param [Array] vector2 Numeric vector
555
+ @param [Array] mask1 Optional mask for vector1
556
+ @param [Array] mask2 Optional mask for vector2
557
+ */
558
+ VALUE rb_kendall(int argc, VALUE *argv, VALUE self) {
559
+ VALUE v1, v2, m1, m2;
560
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
561
+ return rb_distance(v1, m1, v2, m2, kendall);
424
562
  }
425
563
 
426
564
 
427
565
  void Init_flock(void) {
428
- mFlock = rb_define_module("Flock");
429
- rb_define_module_function(mFlock, "kmeans", RUBY_METHOD_FUNC(rb_kmeans), -1);
430
- rb_define_module_function(mFlock, "self_organizing_map", RUBY_METHOD_FUNC(rb_som), -1);
431
- rb_define_module_function(mFlock, "treecluster", RUBY_METHOD_FUNC(rb_treecluster), -1);
566
+ mFlock = rb_define_module("Flock");
567
+ scFlock = rb_singleton_class(mFlock);
568
+
569
+ rb_define_private_method(scFlock, "do_kcluster", RUBY_METHOD_FUNC(rb_do_kcluster), -1);
570
+ rb_define_private_method(scFlock, "do_self_organizing_map", RUBY_METHOD_FUNC(rb_do_self_organizing_map), -1);
571
+ rb_define_private_method(scFlock, "do_treecluster", RUBY_METHOD_FUNC(rb_do_treecluster), -1);
432
572
 
573
+ /* kcluster method - K-Means */
433
574
  rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
575
+
576
+ /* kcluster method - K-Medians */
434
577
  rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
435
578
 
579
+ /* treecluster method - pairwise single-linkage clustering */
580
+ rb_define_const(mFlock, "METHOD_SINGLE_LINKAGE", INT2NUM('s'));
581
+ /* treecluster method - pairwise maximum- (or complete-) linkage clustering */
582
+ rb_define_const(mFlock, "METHOD_MAXIMUM_LINKAGE", INT2NUM('m'));
583
+ /* treecluster method - pairwise average-linkage clustering */
584
+ rb_define_const(mFlock, "METHOD_AVERAGE_LINKAGE", INT2NUM('a'));
585
+ /* treecluster method - pairwise centroid-linkage clustering */
586
+ rb_define_const(mFlock, "METHOD_CENTROID_LINKAGE", INT2NUM('c'));
587
+
588
+
436
589
  rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
437
590
  rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
438
591
  rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
@@ -442,12 +595,27 @@ void Init_flock(void) {
442
595
  rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
443
596
  rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
444
597
 
445
- rb_define_module_function(mFlock, "euclidian_distance", RUBY_METHOD_FUNC(rb_euclid), 2);
446
- rb_define_module_function(mFlock, "cityblock_distance", RUBY_METHOD_FUNC(rb_cityblock), 2);
447
- rb_define_module_function(mFlock, "correlation_distance", RUBY_METHOD_FUNC(rb_correlation), 2);
448
- rb_define_module_function(mFlock, "absolute_correlation_distance", RUBY_METHOD_FUNC(rb_acorrelation), 2);
449
- rb_define_module_function(mFlock, "uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_ucorrelation), 2);
450
- rb_define_module_function(mFlock, "absolute_uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_uacorrelation), 2);
451
- rb_define_module_function(mFlock, "spearman_distance", RUBY_METHOD_FUNC(rb_spearman), 2);
452
- rb_define_module_function(mFlock, "kendall_distance", RUBY_METHOD_FUNC(rb_kendall), 2);
598
+ /* Randomly assign data points to clusters using a uniform distribution. */
599
+ rb_define_const(mFlock, "SEED_RANDOM", INT2NUM(0));
600
+
601
+ /*
602
+ K-Means++ style initialization where data points are probabilistically assigned to clusters
603
+ based on their distance from closest cluster.
604
+ */
605
+ rb_define_const(mFlock, "SEED_KMEANS_PLUSPLUS", INT2NUM(1));
606
+
607
+ /*
608
+ Deterministic cluster assignment by spreading out initial clusters as far away from each other
609
+ as possible.
610
+ */
611
+ rb_define_const(mFlock, "SEED_SPREADOUT", INT2NUM(2));
612
+
613
+ rb_define_module_function(mFlock, "euclidian_distance", RUBY_METHOD_FUNC(rb_euclid), -1);
614
+ rb_define_module_function(mFlock, "cityblock_distance", RUBY_METHOD_FUNC(rb_cityblock), -1);
615
+ rb_define_module_function(mFlock, "correlation_distance", RUBY_METHOD_FUNC(rb_correlation), -1);
616
+ rb_define_module_function(mFlock, "absolute_correlation_distance", RUBY_METHOD_FUNC(rb_acorrelation), -1);
617
+ rb_define_module_function(mFlock, "uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_ucorrelation), -1);
618
+ rb_define_module_function(mFlock, "absolute_uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_uacorrelation), -1);
619
+ rb_define_module_function(mFlock, "spearman_distance", RUBY_METHOD_FUNC(rb_spearman), -1);
620
+ rb_define_module_function(mFlock, "kendall_distance", RUBY_METHOD_FUNC(rb_kendall), -1);
453
621
  }