flock 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -55,7 +55,7 @@ void getclustermedoids(int nclusters, int nelements, double** distance,
55
55
  int clusterid[], int centroids[], double errors[]);
56
56
  void kcluster (int nclusters, int ngenes, int ndata, double** data,
57
57
  int** mask, double weight[], int transpose, int npass, char method, char dist,
58
- int clusterid[], double* error, int* ifound);
58
+ int clusterid[], double* error, int* ifound, int assign);
59
59
  void kmedoids (int nclusters, int nelements, double** distance,
60
60
  int npass, int clusterid[], double* error, int* ifound);
61
61
 
@@ -3,41 +3,60 @@
3
3
 
4
4
  #define ID_CONST_GET rb_intern("const_get")
5
5
  #define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
6
+ #define DEFAULT_ITERATIONS 100
6
7
 
7
- static VALUE mFlock;
8
+ static VALUE mFlock, scFlock;
8
9
  typedef double (*distance_fn)(int, double**, double**, int**, int**, const double [], int, int, int);
9
10
 
10
- int opt_int_value(VALUE option, char *key, int def) {
11
- if (NIL_P(option)) return def;
11
+ int get_int_option(VALUE option, char *key, int default_value) {
12
+ if (NIL_P(option)) return default_value;
12
13
 
13
- VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
14
- return NIL_P(value) ? def : NUM2INT(value);
14
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
15
+ return NIL_P(value) ? default_value : NUM2INT(value);
15
16
  }
16
17
 
17
- int opt_double_value(VALUE option, char *key, double def) {
18
- if (NIL_P(option)) return def;
18
+ int get_bool_option(VALUE option, char *key, int default_value) {
19
+ if (NIL_P(option)) return default_value;
20
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
21
+ return (TYPE(value) == T_FALSE || TYPE(value) == T_NIL) ? 0 : 1;
22
+ }
23
+
24
+ double get_dbl_option(VALUE option, char *key, double default_value) {
25
+ if (NIL_P(option)) return default_value;
19
26
 
20
- VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
21
- return NIL_P(value) ? def : NUM2DBL(value);
27
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
28
+ return NIL_P(value) ? default_value : NUM2DBL(value);
22
29
  }
23
30
 
24
- VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
31
+ VALUE get_value_option(VALUE option, char *key, VALUE default_value) {
32
+ if (NIL_P(option)) return default_value;
33
+
34
+ VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
35
+ return NIL_P(value) ? default_value : value;
36
+ }
37
+
38
+ /* @api private */
39
+ VALUE rb_do_kcluster(int argc, VALUE *argv, VALUE self) {
25
40
  VALUE size, data, mask, weights, options;
26
- rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
41
+ rb_scan_args(argc, argv, "21", &size, &data, &options);
27
42
 
28
43
  if (TYPE(data) != T_ARRAY)
29
44
  rb_raise(rb_eArgError, "data should be an array of arrays");
30
45
 
46
+ if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
47
+ rb_raise(rb_eArgError, "size should be > 0 and <= data size");
48
+
49
+ mask = get_value_option(options, "mask", Qnil);
50
+
31
51
  if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
32
52
  rb_raise(rb_eArgError, "mask should be an array of arrays");
33
53
 
34
- if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
35
- rb_raise(rb_eArgError, "size should be > 0 and <= data size");
54
+ int transpose = get_bool_option(options, "transpose", 0);
55
+ int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
36
56
 
37
- int transpose = opt_int_value(options, "transpose", 0);
38
- int npass = opt_int_value(options, "iterations", 1000);
39
57
  // a = average, m = means
40
- int method = opt_int_value(options, "method", 'a');
58
+ int method = get_int_option(options, "method", 'a');
59
+
41
60
  // e = euclidian,
42
61
  // b = city-block distance
43
62
  // c = correlation
@@ -46,7 +65,10 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
46
65
  // x = absolute uncentered correlation
47
66
  // s = spearman's rank correlation
48
67
  // k = kendall's tau
49
- int dist = opt_int_value(options, "metric", 'e');
68
+ int dist = get_int_option(options, "metric", 'e');
69
+
70
+ // initial assignment
71
+ int assign = get_int_option(options, "seed", 0);
50
72
 
51
73
  int i,j;
52
74
  int nrows = RARRAY_LEN(data);
@@ -94,7 +116,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
94
116
  double error;
95
117
 
96
118
  kcluster(nsets,
97
- nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
119
+ nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound, assign);
98
120
  getclustercentroids(nsets,
99
121
  nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
100
122
 
@@ -137,13 +159,16 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
137
159
  return result;
138
160
  }
139
161
 
140
- VALUE rb_som(int argc, VALUE *argv, VALUE self) {
162
+ /* @api private */
163
+ VALUE rb_do_self_organizing_map(int argc, VALUE *argv, VALUE self) {
141
164
  VALUE nx, ny, data, mask, weights, options;
142
- rb_scan_args(argc, argv, "32", &nx, &ny, &data, &mask, &options);
165
+ rb_scan_args(argc, argv, "31", &nx, &ny, &data, &options);
143
166
 
144
167
  if (TYPE(data) != T_ARRAY)
145
168
  rb_raise(rb_eArgError, "data should be an array of arrays");
146
169
 
170
+ mask = get_value_option(options, "mask", Qnil);
171
+
147
172
  if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
148
173
  rb_raise(rb_eArgError, "mask should be an array of arrays");
149
174
 
@@ -155,8 +180,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
155
180
 
156
181
  int nxgrid = NUM2INT(rb_Integer(nx));
157
182
  int nygrid = NUM2INT(rb_Integer(ny));
158
- int transpose = opt_int_value(options, "transpose", 0);
159
- int npass = opt_int_value(options, "iterations", 1000);
183
+ int transpose = get_int_option(options, "transpose", 0);
184
+ int npass = get_int_option(options, "iterations", DEFAULT_ITERATIONS);
160
185
 
161
186
  // e = euclidian,
162
187
  // b = city-block distance
@@ -166,8 +191,8 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
166
191
  // x = absolute uncentered correlation
167
192
  // s = spearman's rank correlation
168
193
  // k = kendall's tau
169
- int dist = opt_int_value(options, "metric", 'e');
170
- double tau = opt_double_value(options, "tau", 1.0);
194
+ int dist = get_int_option(options, "metric", 'e');
195
+ double tau = get_dbl_option(options, "tau", 1.0);
171
196
 
172
197
  int i, j, k;
173
198
  int nrows = RARRAY_LEN(data);
@@ -211,8 +236,7 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
211
236
  ccelldata[i][j] = (double *)malloc(sizeof(double)*dimy);
212
237
  }
213
238
 
214
- somcluster(nrows, ncols, cdata, cmask, cweights, transpose,
215
- nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
239
+ somcluster(nrows, ncols, cdata, cmask, cweights, transpose, nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
216
240
 
217
241
  VALUE result = rb_hash_new();
218
242
  VALUE cluster = rb_ary_new();
@@ -260,22 +284,30 @@ VALUE rb_som(int argc, VALUE *argv, VALUE self) {
260
284
  return result;
261
285
  }
262
286
 
263
- VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
287
+ /* @api private */
288
+ VALUE rb_do_treecluster(int argc, VALUE *argv, VALUE self) {
264
289
  VALUE size, data, mask, weights, options;
265
- rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
290
+ rb_scan_args(argc, argv, "21", &size, &data, &options);
266
291
 
267
292
  if (TYPE(data) != T_ARRAY)
268
293
  rb_raise(rb_eArgError, "data should be an array of arrays");
269
294
 
295
+ mask = get_value_option(options, "mask", Qnil);
296
+
270
297
  if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
271
298
  rb_raise(rb_eArgError, "mask should be an array of arrays");
272
299
 
273
300
  if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
274
301
  rb_raise(rb_eArgError, "size should be > 0 and <= data size");
275
302
 
276
- int transpose = opt_int_value(options, "transpose", 0);
277
- // a = average, m = means
278
- int method = opt_int_value(options, "method", 'a');
303
+ int transpose = get_int_option(options, "transpose", 0);
304
+
305
+ // s: pairwise single-linkage clustering
306
+ // m: pairwise maximum- (or complete-) linkage clustering
307
+ // a: pairwise average-linkage clustering
308
+ // c: pairwise centroid-linkage clustering
309
+ int method = get_int_option(options, "method", 'a');
310
+
279
311
  // e = euclidian,
280
312
  // b = city-block distance
281
313
  // c = correlation
@@ -284,7 +316,7 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
284
316
  // x = absolute uncentered correlation
285
317
  // s = spearman's rank correlation
286
318
  // k = kendall's tau
287
- int dist = opt_int_value(options, "metric", 'e');
319
+ int dist = get_int_option(options, "metric", 'e');
288
320
 
289
321
  int i,j;
290
322
  int nrows = RARRAY_LEN(data);
@@ -346,15 +378,25 @@ VALUE rb_treecluster(int argc, VALUE *argv, VALUE self) {
346
378
  if (tree)
347
379
  free(tree);
348
380
  else
349
- rb_raise(rb_eNoMemError, "tree cluster ran out of memory");
381
+ rb_raise(rb_eNoMemError, "treecluster ran out of memory");
350
382
 
351
383
  return result;
352
384
  }
353
385
 
354
- VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
386
+ void inline copy_mask(VALUE src, int *dst, int size, int def) {
387
+ int i;
388
+ if (NIL_P(src))
389
+ for (i = 0; i < size; i++)
390
+ dst[i] = def;
391
+ else
392
+ for (i = 0; i < size; i++)
393
+ dst[i] = NUM2INT(rb_ary_entry(src, i));
394
+ }
395
+
396
+ VALUE rb_distance(VALUE vec1, VALUE m1, VALUE vec2, VALUE m2, distance_fn fn) {
355
397
  uint32_t size;
356
398
  double *data1, *data2, *weight, dist;
357
- int *mask, i;
399
+ int *mask1, *mask2, i;
358
400
 
359
401
  if (TYPE(vec1) != T_ARRAY)
360
402
  rb_raise(rb_eArgError, "vector1 should be an array");
@@ -373,17 +415,21 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
373
415
  data1 = (double *)malloc(sizeof(double)*size);
374
416
  data2 = (double *)malloc(sizeof(double)*size);
375
417
  weight = (double *)malloc(sizeof(double)*size);
376
- mask = (int *)malloc(sizeof(int)*size);
418
+ mask1 = (int *)malloc(sizeof(int)*size);
419
+ mask2 = (int *)malloc(sizeof(int)*size);
377
420
 
378
421
  for (i = 0; i < size; i++) {
379
- mask[i] = 1;
380
422
  weight[i] = 1;
381
423
  data1[i] = NUM2DBL(rb_ary_entry(vec1, i));
382
424
  data2[i] = NUM2DBL(rb_ary_entry(vec2, i));
383
425
  }
384
426
 
385
- dist = fn(size, &data1, &data2, &mask, &mask, weight, 0, 0, 0);
386
- free(mask);
427
+ copy_mask(m1, mask1, size, 1);
428
+ copy_mask(m2, mask2, size, 1);
429
+
430
+ dist = fn(size, &data1, &data2, &mask1, &mask2, weight, 0, 0, 0);
431
+ free(mask1);
432
+ free(mask2);
387
433
  free(weight);
388
434
  free(data2);
389
435
  free(data1);
@@ -391,48 +437,155 @@ VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
391
437
  return DBL2NUM(dist);
392
438
  }
393
439
 
394
- VALUE rb_euclid(VALUE self, VALUE vec1, VALUE vec2) {
395
- return rb_distance(vec1, vec2, euclid);
440
+ /*
441
+ Euclidian distance measure
442
+
443
+ @example
444
+ Flock.euclidian_distance([0, 0], [1, 1])
445
+ Flock.euclidian_distance([0, 0, 0], [1, 1, 1], [1, 1, 0], [1, 1, 0]) # with mask
446
+
447
+ @overload euclidian_distance(vector1, vector2, mask1 = identity, mask2 = identity)
448
+ @param [Array] vector1 Numeric vector
449
+ @param [Array] vector2 Numeric vector
450
+ @param [Array] mask1 Optional mask for vector1
451
+ @param [Array] mask2 Optional mask for vector2
452
+ */
453
+ VALUE rb_euclid(int argc, VALUE *argv, VALUE self) {
454
+ VALUE v1, v2, m1, m2;
455
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
456
+ return rb_distance(v1, m1, v2, m2, euclid);
396
457
  }
397
458
 
398
- VALUE rb_cityblock(VALUE self, VALUE vec1, VALUE vec2) {
399
- return rb_distance(vec1, vec2, cityblock);
459
+ /*
460
+ Cityblock distance measure
461
+
462
+ @overload cityblock_distance(vector1, vector2, mask1 = identity, mask2 = identity)
463
+ @param [Array] vector1 Numeric vector
464
+ @param [Array] vector2 Numeric vector
465
+ @param [Array] mask1 Optional mask for vector1
466
+ @param [Array] mask2 Optional mask for vector2
467
+ */
468
+ VALUE rb_cityblock(int argc, VALUE *argv, VALUE self) {
469
+ VALUE v1, v2, m1, m2;
470
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
471
+ return rb_distance(v1, m1, v2, m2, cityblock);
400
472
  }
401
473
 
402
- VALUE rb_correlation(VALUE self, VALUE vec1, VALUE vec2) {
403
- return rb_distance(vec1, vec2, correlation);
474
+ /*
475
+ Correlation distance measure
476
+
477
+ @overload correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
478
+ @param [Array] vector1 Numeric vector
479
+ @param [Array] vector2 Numeric vector
480
+ @param [Array] mask1 Optional mask for vector1
481
+ @param [Array] mask2 Optional mask for vector2
482
+ */
483
+ VALUE rb_correlation(int argc, VALUE *argv, VALUE self) {
484
+ VALUE v1, v2, m1, m2;
485
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
486
+ return rb_distance(v1, m1, v2, m2, correlation);
404
487
  }
405
488
 
406
- VALUE rb_ucorrelation(VALUE self, VALUE vec1, VALUE vec2) {
407
- return rb_distance(vec1, vec2, ucorrelation);
489
+ /*
490
+ Uncentered correlation distance measure
491
+
492
+ @overload uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
493
+ @param [Array] vector1 Numeric vector
494
+ @param [Array] vector2 Numeric vector
495
+ @param [Array] mask1 Optional mask for vector1
496
+ @param [Array] mask2 Optional mask for vector2
497
+ */
498
+ VALUE rb_ucorrelation(int argc, VALUE *argv, VALUE self) {
499
+ VALUE v1, v2, m1, m2;
500
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
501
+ return rb_distance(v1, m1, v2, m2, ucorrelation);
408
502
  }
409
503
 
410
- VALUE rb_acorrelation(VALUE self, VALUE vec1, VALUE vec2) {
411
- return rb_distance(vec1, vec2, acorrelation);
504
+ /*
505
+ Absolute correlation distance measure
506
+
507
+ @overload absolute_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
508
+ @param [Array] vector1 Numeric vector
509
+ @param [Array] vector2 Numeric vector
510
+ @param [Array] mask1 Optional mask for vector1
511
+ @param [Array] mask2 Optional mask for vector2
512
+ */
513
+ VALUE rb_acorrelation(int argc, VALUE *argv, VALUE self) {
514
+ VALUE v1, v2, m1, m2;
515
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
516
+ return rb_distance(v1, m1, v2, m2, acorrelation);
412
517
  }
413
518
 
414
- VALUE rb_uacorrelation(VALUE self, VALUE vec1, VALUE vec2) {
415
- return rb_distance(vec1, vec2, uacorrelation);
519
+ /*
520
+ Absolute uncentered correlation distance measure
521
+
522
+ @overload absolute_uncentered_correlation_distance(vector1, vector2, mask1 = identity, mask2 = identity)
523
+ @param [Array] vector1 Numeric vector
524
+ @param [Array] vector2 Numeric vector
525
+ @param [Array] mask1 Optional mask for vector1
526
+ @param [Array] mask2 Optional mask for vector2
527
+ */
528
+ VALUE rb_uacorrelation(int argc, VALUE *argv, VALUE self) {
529
+ VALUE v1, v2, m1, m2;
530
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
531
+ return rb_distance(v1, m1, v2, m2, uacorrelation);
416
532
  }
417
533
 
418
- VALUE rb_spearman(VALUE self, VALUE vec1, VALUE vec2) {
419
- return rb_distance(vec1, vec2, spearman);
534
+ /*
535
+ Spearman distance measure
536
+
537
+ @overload spearman_distance(vector1, vector2, mask1 = identity, mask2 = identity)
538
+ @param [Array] vector1 Numeric vector
539
+ @param [Array] vector2 Numeric vector
540
+ @param [Array] mask1 Optional mask for vector1
541
+ @param [Array] mask2 Optional mask for vector2
542
+ */
543
+ VALUE rb_spearman(int argc, VALUE *argv, VALUE self) {
544
+ VALUE v1, v2, m1, m2;
545
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
546
+ return rb_distance(v1, m1, v2, m2, spearman);
420
547
  }
421
548
 
422
- VALUE rb_kendall(VALUE self, VALUE vec1, VALUE vec2) {
423
- return rb_distance(vec1, vec2, kendall);
549
+ /*
550
+ Kendall distance measure
551
+
552
+ @overload kendall_distance(vector1, vector2, mask1 = identity, mask2 = identity)
553
+ @param [Array] vector1 Numeric vector
554
+ @param [Array] vector2 Numeric vector
555
+ @param [Array] mask1 Optional mask for vector1
556
+ @param [Array] mask2 Optional mask for vector2
557
+ */
558
+ VALUE rb_kendall(int argc, VALUE *argv, VALUE self) {
559
+ VALUE v1, v2, m1, m2;
560
+ rb_scan_args(argc, argv, "22", &v1, &v2, &m1, &m2);
561
+ return rb_distance(v1, m1, v2, m2, kendall);
424
562
  }
425
563
 
426
564
 
427
565
  void Init_flock(void) {
428
- mFlock = rb_define_module("Flock");
429
- rb_define_module_function(mFlock, "kmeans", RUBY_METHOD_FUNC(rb_kmeans), -1);
430
- rb_define_module_function(mFlock, "self_organizing_map", RUBY_METHOD_FUNC(rb_som), -1);
431
- rb_define_module_function(mFlock, "treecluster", RUBY_METHOD_FUNC(rb_treecluster), -1);
566
+ mFlock = rb_define_module("Flock");
567
+ scFlock = rb_singleton_class(mFlock);
568
+
569
+ rb_define_private_method(scFlock, "do_kcluster", RUBY_METHOD_FUNC(rb_do_kcluster), -1);
570
+ rb_define_private_method(scFlock, "do_self_organizing_map", RUBY_METHOD_FUNC(rb_do_self_organizing_map), -1);
571
+ rb_define_private_method(scFlock, "do_treecluster", RUBY_METHOD_FUNC(rb_do_treecluster), -1);
432
572
 
573
+ /* kcluster method - K-Means */
433
574
  rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
575
+
576
+ /* kcluster method - K-Medians */
434
577
  rb_define_const(mFlock, "METHOD_MEDIAN", INT2NUM('m'));
435
578
 
579
+ /* treecluster method - pairwise single-linkage clustering */
580
+ rb_define_const(mFlock, "METHOD_SINGLE_LINKAGE", INT2NUM('s'));
581
+ /* treecluster method - pairwise maximum- (or complete-) linkage clustering */
582
+ rb_define_const(mFlock, "METHOD_MAXIMUM_LINKAGE", INT2NUM('m'));
583
+ /* treecluster method - pairwise average-linkage clustering */
584
+ rb_define_const(mFlock, "METHOD_AVERAGE_LINKAGE", INT2NUM('a'));
585
+ /* treecluster method - pairwise centroid-linkage clustering */
586
+ rb_define_const(mFlock, "METHOD_CENTROID_LINKAGE", INT2NUM('c'));
587
+
588
+
436
589
  rb_define_const(mFlock, "METRIC_EUCLIDIAN", INT2NUM('e'));
437
590
  rb_define_const(mFlock, "METRIC_CITY_BLOCK", INT2NUM('b'));
438
591
  rb_define_const(mFlock, "METRIC_CORRELATION", INT2NUM('c'));
@@ -442,12 +595,27 @@ void Init_flock(void) {
442
595
  rb_define_const(mFlock, "METRIC_SPEARMAN", INT2NUM('s'));
443
596
  rb_define_const(mFlock, "METRIC_KENDALL", INT2NUM('k'));
444
597
 
445
- rb_define_module_function(mFlock, "euclidian_distance", RUBY_METHOD_FUNC(rb_euclid), 2);
446
- rb_define_module_function(mFlock, "cityblock_distance", RUBY_METHOD_FUNC(rb_cityblock), 2);
447
- rb_define_module_function(mFlock, "correlation_distance", RUBY_METHOD_FUNC(rb_correlation), 2);
448
- rb_define_module_function(mFlock, "absolute_correlation_distance", RUBY_METHOD_FUNC(rb_acorrelation), 2);
449
- rb_define_module_function(mFlock, "uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_ucorrelation), 2);
450
- rb_define_module_function(mFlock, "absolute_uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_uacorrelation), 2);
451
- rb_define_module_function(mFlock, "spearman_distance", RUBY_METHOD_FUNC(rb_spearman), 2);
452
- rb_define_module_function(mFlock, "kendall_distance", RUBY_METHOD_FUNC(rb_kendall), 2);
598
+ /* Randomly assign data points to clusters using a uniform distribution. */
599
+ rb_define_const(mFlock, "SEED_RANDOM", INT2NUM(0));
600
+
601
+ /*
602
+ K-Means++ style initialization where data points are probabilistically assigned to clusters
603
+ based on their distance from closest cluster.
604
+ */
605
+ rb_define_const(mFlock, "SEED_KMEANS_PLUSPLUS", INT2NUM(1));
606
+
607
+ /*
608
+ Deterministic cluster assignment by spreading out initial clusters as far away from each other
609
+ as possible.
610
+ */
611
+ rb_define_const(mFlock, "SEED_SPREADOUT", INT2NUM(2));
612
+
613
+ rb_define_module_function(mFlock, "euclidian_distance", RUBY_METHOD_FUNC(rb_euclid), -1);
614
+ rb_define_module_function(mFlock, "cityblock_distance", RUBY_METHOD_FUNC(rb_cityblock), -1);
615
+ rb_define_module_function(mFlock, "correlation_distance", RUBY_METHOD_FUNC(rb_correlation), -1);
616
+ rb_define_module_function(mFlock, "absolute_correlation_distance", RUBY_METHOD_FUNC(rb_acorrelation), -1);
617
+ rb_define_module_function(mFlock, "uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_ucorrelation), -1);
618
+ rb_define_module_function(mFlock, "absolute_uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_uacorrelation), -1);
619
+ rb_define_module_function(mFlock, "spearman_distance", RUBY_METHOD_FUNC(rb_spearman), -1);
620
+ rb_define_module_function(mFlock, "kendall_distance", RUBY_METHOD_FUNC(rb_kendall), -1);
453
621
  }