RubyGems - flock - Versions diffs - 0.2.1 → 0.3.0 - Mend

flock 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/README.rdoc CHANGED

@@ -76,17 +76,19 @@ Provides bindings to K-Means clustering in Cluster 3.0
   require 'flock'
   data = []
-  data << { apple:  1, orange: 1 }
-  data << { black:  1, white:  1 }
-  data << { white:  1, cyan:   1 }
-  data << { orange: 1 }
-  data << { apple:  1 }
+  # keys don't need to be numeric
+  data << { 1 => 0.5, 2 => 0.5 }
+  data << { 3 => 1, 4 => 1 }
+  data << { 4 => 1, 5 => 0.3 }
+  data << { 2 => 0.75 }
+  data << { 1 => 0.60 }
   pp Flock.sparse_kmeans(2, data)
-  # or even more simply (defaults to 1)
   data = []
+  # a much simpler way to cluster text
   data << %w(apple orange)
   data << %w(black white)
   data << %w(white cyan)

data/VERSION CHANGED

	@@ -1 +1 @@
1	- 0.2.1
1	+ 0.3.0

data/examples/som.rb ADDED

@@ -0,0 +1,13 @@
+#!/usr/bin/ruby
+require 'pp'
+require 'flock'
+data = []
+data << %w(orange apple)
+data << %w(black white)
+data << %w(white cyan)
+data << %w(orange)
+data << %w(apple)
+pp Flock.sparse_self_organizing_map(2, 2, data)

data/examples/sparse.rb CHANGED

@@ -4,11 +4,11 @@ require 'pp'
 require 'flock'
 data = []
-data << { apple:  1, orange: 1 }
-data << { black:  1, white:  1 }
-data << { white:  1, cyan:   1 }
-data << { orange: 1 }
-data << { apple:  1 }
+data << { 1 => 0.5, 2 => 0.5 }
+data << { 3 => 1, 4 => 1 }
+data << { 4 => 1, 5 => 0.3 }
+data << { 2 => 0.75 }
+data << { 1 => 0.60 }
 pp Flock.sparse_kmeans(2, data)

data/ext/cluster.c CHANGED

@@ -5,7 +5,7 @@
  * Human Genome Center, Institute of Medical Science, University of Tokyo,
  * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
  * Contact: mdehoon 'AT' gsc.riken.jp
- *
+ *
  * Permission to use, copy, modify, and distribute this software and its
  * documentation with or without modifications and for any purpose and
  * without fee is hereby granted, provided that any copyright notices
@@ -14,7 +14,7 @@
  * names of the contributors or copyright holders not be used in
  * advertising or publicity pertaining to distribution of the software
  * without specific prior permission.
- *
+ *
  * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
@@ -23,7 +23,7 @@
  * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
  * OR PERFORMANCE OF THIS SOFTWARE.
- *
+ *
  */
 #include <time.h>
@@ -334,7 +334,7 @@ static int svd(int m, int n, double** u, double w[], double** vt)
  *   A=usv  of a real m by n rectangular matrix, where m is greater
  *   than or equal to n.  Householder bidiagonalization and a variant
  *   of the QR algorithm are used.
- *
+ *
  *
  *   On input.
  *
@@ -929,10 +929,9 @@ positive integer if the singular value decomposition fails to converge.
 /* ********************************************************************* */
-static
 double euclid (int n, double** data1, double** data2, int** mask1, int** mask2,
   const double weight[], int index1, int index2, int transpose)
 /*
 Purpose
 =======
@@ -1004,7 +1003,6 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* ********************************************************************* */
-static
 double cityblock (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
@@ -1080,7 +1078,6 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* ********************************************************************* */
-static
 double correlation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
 /*
@@ -1180,7 +1177,6 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* ********************************************************************* */
-static
 double acorrelation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
 /*
@@ -1279,7 +1275,6 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* ********************************************************************* */
-static
 double ucorrelation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
 /*
@@ -1374,7 +1369,6 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* ********************************************************************* */
-static
 double uacorrelation (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
 /*
@@ -1469,7 +1463,6 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* *********************************************************************  */
-static
 double spearman (int n, double** data1, double** data2, int** mask1,
   int** mask2, const double weight[], int index1, int index2, int transpose)
 /*
@@ -1597,7 +1590,6 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* *********************************************************************  */
-static
 double kendall (int n, double** data1, double** data2, int** mask1, int** mask2,
   const double weight[], int index1, int index2, int transpose)
 /*
@@ -1708,7 +1700,7 @@ Otherwise, the distance between two columns in the matrix is calculated.
 /* *********************************************************************  */
-static double(*setmetric(char dist))
+static double(*setmetric(char dist))
   (int, double**, double**, int**, int**, const double[], int, int, int)
 { switch(dist)
   { case 'e': return &euclid;
@@ -2203,7 +2195,7 @@ calculating the medians.
     }
   }
 }
 /* ********************************************************************* */
 int getclustercentroids(int nclusters, int nrows, int ncolumns,
@@ -2427,7 +2419,7 @@ kmeans(int nclusters, int nrows, int ncolumns, double** data, int** mask,
         break; /* Identical solution found; break out of this loop */
     }
-    if (npass<=1)
+    if (npass<=1)
     { *error = total;
       break;
     }
@@ -2532,7 +2524,7 @@ kmedians(int nclusters, int nrows, int ncolumns, double** data, int** mask,
         break; /* Identical solution found; break out of this loop */
     }
-    if (npass<=1)
+    if (npass<=1)
     { *error = total;
       break;
     }
@@ -2603,7 +2595,7 @@ of the matrix are clustered.
 npass      (input) int
 The number of times clustering is performed. Clustering is performed npass
-times, each time starting from a different (random) initial assignment of
+times, each time starting from a different (random) initial assignment of
 genes to clusters. The clustering solution with the lowest within-cluster sum
 of distances is chosen.
 If npass==0, then the clustering algorithm will be run once, where the initial
@@ -2697,7 +2689,7 @@ number of clusters is larger than the number of elements being clustered,
       return;
     }
   }
   if (method=='m')
   { double* cache = malloc(nelements*sizeof(double));
     if(cache)
@@ -3105,7 +3097,7 @@ weights array, the function returns NULL.
 /* ******************************************************************** */
-void cuttree (int nelements, Node* tree, int nclusters, int clusterid[])
+void cuttree (int nelements, Node* tree, int nclusters, int clusterid[])
 /*
 Purpose
@@ -3160,7 +3152,7 @@ error occured, all elements in clusterid are set to -1.
   }
   for (i = 0; i < n; i++) nodeid[i] = -1;
   for (i = n-1; i >= 0; i--)
-  { if(nodeid[i]<0)
+  { if(nodeid[i]<0)
     { j = icluster;
       nodeid[i] = j;
       icluster++;
@@ -3269,7 +3261,7 @@ If a memory error occurs, pclcluster returns NULL.
   if(!makedatamask(nelements, ndata, &newdata, &newmask))
   { free(result);
     free(distid);
-    return NULL;
+    return NULL;
   }
   for (i = 0; i < nelements; i++) distid[i] = i;
@@ -3313,7 +3305,7 @@ If a memory error occurs, pclcluster returns NULL.
     free(mask[is]);
     data[is] = data[nnodes-inode];
     mask[is] = mask[nnodes-inode];
     /* Fix the distances */
     distid[is] = distid[nnodes-inode];
     for (i = 0; i < is; i++)
@@ -3334,7 +3326,7 @@ If a memory error occurs, pclcluster returns NULL.
   free(data);
   free(mask);
   free(distid);
   return result;
 }
@@ -3829,7 +3821,7 @@ If a memory error occurs, treecluster returns NULL.
     for (i = 1; i < nelements; i++) free(distmatrix[i]);
     free (distmatrix);
   }
   return result;
 }
@@ -4037,7 +4029,7 @@ void somworker (int nrows, int ncolumns, double** data, int** mask,
 static
 void somassign (int nrows, int ncolumns, double** data, int** mask,
   const double weights[], int transpose, int nxgrid, int nygrid,
-  double*** celldata, char dist, int clusterid[][2])
+  double*** celldata, char dist, int **clusterid)
 /* Collect clusterids */
 { const int ndata = (transpose==0) ? ncolumns : nrows;
   int i,j;
@@ -4121,7 +4113,7 @@ void somassign (int nrows, int ncolumns, double** data, int** mask,
 void somcluster (int nrows, int ncolumns, double** data, int** mask,
   const double weight[], int transpose, int nxgrid, int nygrid,
-  double inittau, int niter, char dist, double*** celldata, int clusterid[][2])
+  double inittau, int niter, char dist, double*** celldata, int **clusterid)
 /*
 Purpose
@@ -4235,7 +4227,7 @@ somcluster.
 double clusterdistance (int nrows, int ncolumns, double** data,
   int** mask, double weight[], int n1, int n2, int index1[], int index2[],
   char dist, char method, int transpose)
 /*
 Purpose
 =======

data/ext/cluster.h CHANGED

@@ -6,7 +6,7 @@
  * Human Genome Center, Institute of Medical Science, University of Tokyo,
  * 4-6-1 Shirokanedai, Minato-ku, Tokyo 108-8639, Japan.
  * Contact: mdehoon 'AT' gsc.riken.jp
- *
+ *
  * Permission to use, copy, modify, and distribute this software and its
  * documentation with or without modifications and for any purpose and
  * without fee is hereby granted, provided that any copyright notices
@@ -15,7 +15,7 @@
  * names of the contributors or copyright holders not be used in
  * advertising or publicity pertaining to distribution of the software
  * without specific prior permission.
- *
+ *
  * THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
  * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
@@ -24,7 +24,7 @@
  * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
  * OR PERFORMANCE OF THIS SOFTWARE.
- *
+ *
  */
 #ifndef min
@@ -79,7 +79,7 @@ void cuttree (int nelements, Node* tree, int nclusters, int clusterid[]);
 void somcluster (int nrows, int ncolumns, double** data, int** mask,
   const double weight[], int transpose, int nxnodes, int nynodes,
   double inittau, int niter, char dist, double*** celldata,
-  int clusterid[][2]);
+  int **clusterid);
 /* Chapter 6 */
 int pca(int m, int n, double** u, double** v, double* w);
@@ -91,3 +91,13 @@ double median (int n, double x[]);
 double* calculate_weights(int nrows, int ncolumns, double** data, int** mask,
   double weights[], int transpose, char dist, double cutoff, double exponent);
+/* distance functions */
+extern double euclid (int, double**, double**, int**, int**, const double [], int, int, int);
+extern double cityblock(int, double**, double**, int**, int**, const double [], int, int, int);
+extern double correlation(int, double**, double**, int**, int**, const double [], int, int, int);
+extern double acorrelation(int, double**, double**, int**, int**, const double [], int, int, int);
+extern double ucorrelation(int, double**, double**, int**, int**, const double [], int, int, int);
+extern double uacorrelation(int, double**, double**, int**, int**, const double [], int, int, int);
+extern double spearman(int, double**, double**, int**, int**, const double [], int, int, int);
+extern double kendall(int, double**, double**, int**, int**, const double [], int, int, int);

data/ext/flock.c CHANGED

@@ -5,6 +5,7 @@
 #define CONST_GET(scope, constant) (rb_funcall(scope, ID_CONST_GET, 1, rb_str_new2(constant)))
 static VALUE mFlock;
+typedef double (*distance_fn)(int, double**, double**, int**, int**, const double [], int, int, int);
 int opt_int_value(VALUE option, char *key, int def) {
   if (NIL_P(option)) return def;
@@ -13,6 +14,13 @@ int opt_int_value(VALUE option, char *key, int def) {
   return NIL_P(value) ? def : NUM2INT(value);
 }
+int opt_double_value(VALUE option, char *key, double def) {
+  if (NIL_P(option)) return def;
+  VALUE value = rb_hash_aref(option, ID2SYM(rb_intern(key)));
+  return NIL_P(value) ? def : NUM2DBL(value);
+}
 VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
     VALUE size, data, mask, weights, options;
     rb_scan_args(argc, argv, "22", &size, &data, &mask, &options);
@@ -26,6 +34,20 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
     if (NIL_P(size) || NUM2INT(rb_Integer(size)) > RARRAY_LEN(data))
         rb_raise(rb_eArgError, "size should be > 0 and <= data size");
+    int transpose = opt_int_value(options, "transpose", 0);
+    int npass     = opt_int_value(options, "iterations", 1000);
+    // a = average, m = means
+    int method    = opt_int_value(options, "method", 'a');
+    // e = euclidian,
+    // b = city-block distance
+    // c = correlation
+    // a = absolute value of the correlation
+    // u = uncentered correlation
+    // x = absolute uncentered correlation
+    // s = spearman's rank correlation
+    // k = kendall's tau
+    int dist      = opt_int_value(options, "metric", 'e');
     int i,j;
     int nrows = RARRAY_LEN(data);
     int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
@@ -33,16 +55,14 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
     double **cdata          = (double**)malloc(sizeof(double*)*nrows);
     int    **cmask          = (int   **)malloc(sizeof(int   *)*nrows);
-    double **ccentroid      = (double**)malloc(sizeof(double*)*nrows);
-    int    **ccentroid_mask = (int   **)malloc(sizeof(int   *)*nrows);
     double *cweights        = (double *)malloc(sizeof(double )*ncols);
-    int    *ccluster        = (int    *)malloc(sizeof(int    )*nrows);
+    double **ccentroid;
+    int *ccluster, **ccentroid_mask, dimx = nrows, dimy = ncols, cdimx = nsets, cdimy = ncols;
     for (i = 0; i < nrows; i++) {
         cdata[i]          = (double*)malloc(sizeof(double)*ncols);
         cmask[i]          = (int   *)malloc(sizeof(int   )*ncols);
-        ccentroid[i]      = (double*)malloc(sizeof(double)*ncols);
-        ccentroid_mask[i] = (int   *)malloc(sizeof(int   )*ncols);
         for (j = 0; j < ncols; j++) {
             cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
             cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
@@ -54,25 +74,27 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
         cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
     }
-    int transpose = opt_int_value(options, "transpose", 0);
-    int npass     = opt_int_value(options, "iterations", 1000);
-    // a = average, m = means
-    int method    = opt_int_value(options, "method", 'a');
-    // e = euclidian,
-    // b = city-block distance
-    // c = correlation
-    // a = absolute value of the correlation
-    // u = uncentered correlation
-    // x = absolute uncentered correlation
-    // s = spearman's rank correlation
-    // k = kendall's tau
-    int dist      = opt_int_value(options, "metric", 'e');
+    if (transpose) {
+        dimx  = ncols;
+        dimy  = nrows;
+        cdimx = nrows;
+        cdimy = nsets;
+    }
+    ccluster       = (int    *)malloc(sizeof(int    )*dimx);
+    ccentroid      = (double**)malloc(sizeof(double*)*cdimx);
+    ccentroid_mask = (int   **)malloc(sizeof(int   *)*cdimx);
+    for (i = 0; i < cdimx; i++) {
+      ccentroid[i]      = (double*)malloc(sizeof(double)*cdimy);
+      ccentroid_mask[i] = (int   *)malloc(sizeof(int   )*cdimy);
+    }
     int    ifound;
     double error;
     kcluster(nsets,
         nrows, ncols, cdata, cmask, cweights, transpose, npass, method, dist, ccluster, &error, &ifound);
     getclustercentroids(nsets,
         nrows, ncols, cdata, cmask, ccluster, ccentroid, ccentroid_mask, transpose, method);
@@ -80,10 +102,12 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
     VALUE cluster  = rb_ary_new();
     VALUE centroid = rb_ary_new();
-    for (i = 0; i < nrows; i++) {
+    for (i = 0; i < dimx; i++)
         rb_ary_push(cluster, INT2NUM(ccluster[i]));
+    for (i = 0; i < cdimx; i++) {
         VALUE point = rb_ary_new();
-        for (j = 0; j < ncols; j++)
+        for (j = 0; j < cdimy; j++)
             rb_ary_push(point, DBL2NUM(ccentroid[i][j]));
         rb_ary_push(centroid, point);
     }
@@ -96,6 +120,9 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
     for (i = 0; i < nrows; i++) {
         free(cdata[i]);
         free(cmask[i]);
+    }
+    for (i = 0; i < cdimx; i++) {
         free(ccentroid[i]);
         free(ccentroid_mask[i]);
     }
@@ -110,9 +137,203 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
     return result;
 }
+VALUE rb_som(int argc, VALUE *argv, VALUE self) {
+    VALUE nx, ny, data, mask, weights, options;
+    rb_scan_args(argc, argv, "32", &nx, &ny, &data, &mask, &options);
+    if (TYPE(data) != T_ARRAY)
+        rb_raise(rb_eArgError, "data should be an array of arrays");
+    if (!NIL_P(mask) && TYPE(mask) != T_ARRAY)
+        rb_raise(rb_eArgError, "mask should be an array of arrays");
+    if (NIL_P(nx) || NUM2INT(rb_Integer(nx)) <= 0)
+        rb_raise(rb_eArgError, "nx should be > 0");
+    if (NIL_P(ny) || NUM2INT(rb_Integer(ny)) <= 0)
+        rb_raise(rb_eArgError, "ny should be > 0");
+    int nxgrid    = NUM2INT(rb_Integer(nx));
+    int nygrid    = NUM2INT(rb_Integer(ny));
+    int transpose = opt_int_value(options, "transpose", 0);
+    int npass     = opt_int_value(options, "iterations", 1000);
+    // e = euclidian,
+    // b = city-block distance
+    // c = correlation
+    // a = absolute value of the correlation
+    // u = uncentered correlation
+    // x = absolute uncentered correlation
+    // s = spearman's rank correlation
+    // k = kendall's tau
+    int dist      = opt_int_value(options, "metric", 'e');
+    double tau    = opt_double_value(options, "tau", 1.0);
+    int i, j, k;
+    int nrows = RARRAY_LEN(data);
+    int ncols = RARRAY_LEN(rb_ary_entry(data, 0));
+    double **cdata          = (double**)malloc(sizeof(double*)*nrows);
+    int    **cmask          = (int   **)malloc(sizeof(int   *)*nrows);
+    double *cweights        = (double *)malloc(sizeof(double )*ncols);
+    int **ccluster;
+    double ***ccelldata;
+    int dimx = nrows, dimy = ncols;
+    if (transpose) {
+        dimx = ncols;
+        dimy = nrows;
+    }
+    ccluster = (int **)malloc(sizeof(int*)*dimx);
+    for (i = 0; i < dimx; i++)
+        ccluster[i] = (int*)malloc(sizeof(int)*2);
+    for (i = 0; i < nrows; i++) {
+        cdata[i]          = (double*)malloc(sizeof(double)*ncols);
+        cmask[i]          = (int   *)malloc(sizeof(int   )*ncols);
+        for (j = 0; j < ncols; j++) {
+            cdata[i][j] = NUM2DBL(rb_Float(rb_ary_entry(rb_ary_entry(data, i), j)));
+            cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_Integer(rb_ary_entry(rb_ary_entry(mask, i), j)));
+        }
+    }
+    weights = NIL_P(options) ? Qnil : rb_hash_aref(options, ID2SYM(rb_intern("weights")));
+    for (i = 0; i < ncols; i++) {
+        cweights[i] = NIL_P(weights) ? 1.0 : NUM2DBL(rb_Float(rb_ary_entry(weights, i)));
+    }
+    ccelldata = (double***)malloc(sizeof(double**)*nxgrid);
+    for (i = 0; i < nxgrid; i++) {
+        ccelldata[i] = (double **)malloc(sizeof(double*)*nygrid);
+        for (j = 0; j < nygrid; j++)
+            ccelldata[i][j] = (double *)malloc(sizeof(double)*dimy);
+    }
+    somcluster(nrows, ncols, cdata, cmask, cweights, transpose,
+        nxgrid, nygrid, tau, npass, dist, ccelldata, ccluster);
+    VALUE result   = rb_hash_new();
+    VALUE cluster  = rb_ary_new();
+    VALUE centroid = rb_ary_new();
+    for (i = 0; i < dimx; i++)
+        rb_ary_push(cluster, INT2NUM(ccluster[i][0] * nxgrid + ccluster[i][1]));
+    for (i = 0; i < nxgrid; i++) {
+        for (j = 0; j < nygrid; j++) {
+            VALUE point = rb_ary_new();
+            for (k = 0; k < dimy; k++)
+                rb_ary_push(point, DBL2NUM(ccelldata[i][j][k]));
+            rb_ary_push(centroid, point);
+        }
+    }
+    rb_hash_aset(result, ID2SYM(rb_intern("cluster")),   cluster);
+    rb_hash_aset(result, ID2SYM(rb_intern("centroid")),  centroid);
+    for (i = 0; i < nrows; i++) {
+        free(cdata[i]);
+        free(cmask[i]);
+    }
+    for (i = 0; i < dimx; i++)
+        free(ccluster[i]);
+    for (i = 0; i < nxgrid; i++) {
+        for (j = 0; j < nygrid; j++)
+            free(ccelldata[i][j]);
+        free(ccelldata[i]);
+    }
+    free(cdata);
+    free(cmask);
+    free(ccelldata);
+    free(cweights);
+    free(ccluster);
+    return result;
+}
+VALUE rb_distance(VALUE vec1, VALUE vec2, distance_fn fn) {
+    uint32_t size;
+    double *data1, *data2, *weight, dist;
+    int *mask, i;
+    if (TYPE(vec1) != T_ARRAY)
+        rb_raise(rb_eArgError, "vector1 should be an array");
+    if (TYPE(vec2) != T_ARRAY)
+        rb_raise(rb_eArgError, "vector2 should be an array");
+    size = RARRAY_LEN(vec1);
+    if (size != RARRAY_LEN(vec2))
+        rb_raise(rb_eArgError, "vector1 & vector2 dimensions mismatch");
+    if (size < 1)
+        rb_raise(rb_eArgError, "dimension should be greater than 0");
+    data1  = (double *)malloc(sizeof(double)*size);
+    data2  = (double *)malloc(sizeof(double)*size);
+    weight = (double *)malloc(sizeof(double)*size);
+    mask   = (int *)malloc(sizeof(int)*size);
+    for (i = 0; i < size; i++) {
+        mask[i]   = 1;
+        weight[i] = 1;
+        data1[i]  = NUM2DBL(rb_ary_entry(vec1, i));
+        data2[i]  = NUM2DBL(rb_ary_entry(vec2, i));
+    }
+    dist = fn(size, &data1, &data2, &mask, &mask, weight, 0, 0, 0);
+    free(mask);
+    free(weight);
+    free(data2);
+    free(data1);
+    return DBL2NUM(dist);
+}
+VALUE rb_euclid(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, euclid);
+}
+VALUE rb_cityblock(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, cityblock);
+}
+VALUE rb_correlation(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, correlation);
+}
+VALUE rb_ucorrelation(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, ucorrelation);
+}
+VALUE rb_acorrelation(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, acorrelation);
+}
+VALUE rb_uacorrelation(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, uacorrelation);
+}
+VALUE rb_spearman(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, spearman);
+}
+VALUE rb_kendall(VALUE self, VALUE vec1, VALUE vec2) {
+    return rb_distance(vec1, vec2, kendall);
+}
 void Init_flock(void) {
     mFlock = rb_define_module("Flock");
     rb_define_module_function(mFlock, "kmeans", RUBY_METHOD_FUNC(rb_kmeans), -1);
+    rb_define_module_function(mFlock, "self_organizing_map", RUBY_METHOD_FUNC(rb_som), -1);
     rb_define_const(mFlock, "METHOD_AVERAGE", INT2NUM('a'));
     rb_define_const(mFlock, "METHOD_MEDIAN",  INT2NUM('m'));
@@ -125,4 +346,13 @@ void Init_flock(void) {
     rb_define_const(mFlock, "METRIC_ABSOLUTE_UNCENTERED_CORRELATION", INT2NUM('x'));
     rb_define_const(mFlock, "METRIC_SPEARMAN",                        INT2NUM('s'));
     rb_define_const(mFlock, "METRIC_KENDALL",                         INT2NUM('k'));
+    rb_define_module_function(mFlock, "euclidian_distance", RUBY_METHOD_FUNC(rb_euclid), 2);
+    rb_define_module_function(mFlock, "cityblock_distance", RUBY_METHOD_FUNC(rb_cityblock), 2);
+    rb_define_module_function(mFlock, "correlation_distance", RUBY_METHOD_FUNC(rb_correlation), 2);
+    rb_define_module_function(mFlock, "absolute_correlation_distance", RUBY_METHOD_FUNC(rb_acorrelation), 2);
+    rb_define_module_function(mFlock, "uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_ucorrelation), 2);
+    rb_define_module_function(mFlock, "absolute_uncentered_correlation_distance", RUBY_METHOD_FUNC(rb_uacorrelation), 2);
+    rb_define_module_function(mFlock, "spearman_distance", RUBY_METHOD_FUNC(rb_spearman), 2);
+    rb_define_module_function(mFlock, "kendall_distance", RUBY_METHOD_FUNC(rb_kendall), 2);
 }

data/flock.gemspec CHANGED

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{flock}
-  s.version = "0.2.1"
+  s.version = "0.3.0"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["Bharanee Rathna"]
-  s.date = %q{2011-02-19}
+  s.date = %q{2011-04-24}
   s.description = %q{A thin ruby binding to Cluster 3.0}
   s.email = ["deepfryed@gmail.com"]
   s.extensions = ["ext/extconf.rb"]
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
   s.summary = %q{Ruby bindings to Cluster 3.0.}
   s.test_files = [
     "examples/sparse.rb",
+     "examples/som.rb",
      "examples/dense.rb"
   ]

data/lib/flock.rb CHANGED

@@ -21,7 +21,7 @@ module Flock
     [dims,data]
   end
-  def self.sparse_kmeans size, sparse_data, options={}
+  def self.sparse_kmeans size, sparse_data, options = {}
     dims, data = sparse_data[0].kind_of?(Array) ? sparse_array_to_data(sparse_data) : sparse_hash_to_data(sparse_data)
     if options.key?(:weights)
@@ -32,4 +32,16 @@ module Flock
     kmeans(size, data, nil, options)
   end
+  def self.sparse_self_organizing_map nx, ny, sparse_data, options = {}
+    dims, data = sparse_data[0].kind_of?(Array) ? sparse_array_to_data(sparse_data) : sparse_hash_to_data(sparse_data)
+    if options.key?(:weights)
+      weights = Array.new(dims.size) {1}
+      options[:weights].each {|k,v| weights[dims[k]] = v }
+      options[:weights] = weights
+    end
+    self_organizing_map(nx, ny, data, nil, options)
+  end
 end

metadata CHANGED

@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
   prerelease: false
   segments:
   - 0
-  - 2
-  - 1
-  version: 0.2.1
+  - 3
+  - 0
+  version: 0.3.0
 platform: ruby
 authors:
 - Bharanee Rathna
@@ -14,7 +14,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2011-02-19 00:00:00 +11:00
+date: 2011-04-24 00:00:00 +10:00
 default_executable:
 dependencies: []
@@ -38,6 +38,7 @@ files:
 - flock.gemspec
 - lib/flock.rb
 - examples/sparse.rb
+- examples/som.rb
 - examples/dense.rb
 has_rdoc: true
 homepage: http://github.com/deepfryed/flock
@@ -73,4 +74,5 @@ specification_version: 3
 summary: Ruby bindings to Cluster 3.0.
 test_files:
 - examples/sparse.rb
+- examples/som.rb
 - examples/dense.rb