RubyGems - alglib - Versions diffs - 0.1.1 - Mend

alglib 0.1.1

Files changed (255) hide show

data/History.txt +7 -0
data/Manifest.txt +253 -0
data/README.txt +33 -0
data/Rakefile +27 -0
data/ext/Rakefile +24 -0
data/ext/alglib.i +24 -0
data/ext/alglib/Makefile +157 -0
data/ext/alglib/airyf.cpp +372 -0
data/ext/alglib/airyf.h +81 -0
data/ext/alglib/alglib.cpp +8558 -0
data/ext/alglib/alglib_util.cpp +19 -0
data/ext/alglib/alglib_util.h +14 -0
data/ext/alglib/ap.cpp +877 -0
data/ext/alglib/ap.english.html +364 -0
data/ext/alglib/ap.h +666 -0
data/ext/alglib/ap.russian.html +442 -0
data/ext/alglib/apvt.h +754 -0
data/ext/alglib/bdss.cpp +1500 -0
data/ext/alglib/bdss.h +251 -0
data/ext/alglib/bdsvd.cpp +1339 -0
data/ext/alglib/bdsvd.h +164 -0
data/ext/alglib/bessel.cpp +1226 -0
data/ext/alglib/bessel.h +331 -0
data/ext/alglib/betaf.cpp +105 -0
data/ext/alglib/betaf.h +74 -0
data/ext/alglib/bidiagonal.cpp +1328 -0
data/ext/alglib/bidiagonal.h +350 -0
data/ext/alglib/binomialdistr.cpp +247 -0
data/ext/alglib/binomialdistr.h +153 -0
data/ext/alglib/blas.cpp +576 -0
data/ext/alglib/blas.h +132 -0
data/ext/alglib/cblas.cpp +226 -0
data/ext/alglib/cblas.h +57 -0
data/ext/alglib/cdet.cpp +138 -0
data/ext/alglib/cdet.h +92 -0
data/ext/alglib/chebyshev.cpp +216 -0
data/ext/alglib/chebyshev.h +76 -0
data/ext/alglib/chisquaredistr.cpp +157 -0
data/ext/alglib/chisquaredistr.h +144 -0
data/ext/alglib/cholesky.cpp +285 -0
data/ext/alglib/cholesky.h +86 -0
data/ext/alglib/cinverse.cpp +298 -0
data/ext/alglib/cinverse.h +111 -0
data/ext/alglib/clu.cpp +337 -0
data/ext/alglib/clu.h +120 -0
data/ext/alglib/correlation.cpp +280 -0
data/ext/alglib/correlation.h +77 -0
data/ext/alglib/correlationtests.cpp +726 -0
data/ext/alglib/correlationtests.h +134 -0
data/ext/alglib/crcond.cpp +826 -0
data/ext/alglib/crcond.h +148 -0
data/ext/alglib/creflections.cpp +310 -0
data/ext/alglib/creflections.h +165 -0
data/ext/alglib/csolve.cpp +312 -0
data/ext/alglib/csolve.h +99 -0
data/ext/alglib/ctrinverse.cpp +387 -0
data/ext/alglib/ctrinverse.h +98 -0
data/ext/alglib/ctrlinsolve.cpp +297 -0
data/ext/alglib/ctrlinsolve.h +81 -0
data/ext/alglib/dawson.cpp +234 -0
data/ext/alglib/dawson.h +74 -0
data/ext/alglib/descriptivestatistics.cpp +436 -0
data/ext/alglib/descriptivestatistics.h +112 -0
data/ext/alglib/det.cpp +140 -0
data/ext/alglib/det.h +94 -0
data/ext/alglib/dforest.cpp +1819 -0
data/ext/alglib/dforest.h +316 -0
data/ext/alglib/elliptic.cpp +497 -0
data/ext/alglib/elliptic.h +217 -0
data/ext/alglib/estnorm.cpp +429 -0
data/ext/alglib/estnorm.h +107 -0
data/ext/alglib/expintegrals.cpp +422 -0
data/ext/alglib/expintegrals.h +108 -0
data/ext/alglib/faq.english.html +258 -0
data/ext/alglib/faq.russian.html +272 -0
data/ext/alglib/fdistr.cpp +202 -0
data/ext/alglib/fdistr.h +163 -0
data/ext/alglib/fresnel.cpp +211 -0
data/ext/alglib/fresnel.h +91 -0
data/ext/alglib/gammaf.cpp +338 -0
data/ext/alglib/gammaf.h +104 -0
data/ext/alglib/gqgengauss.cpp +235 -0
data/ext/alglib/gqgengauss.h +92 -0
data/ext/alglib/gqgenhermite.cpp +268 -0
data/ext/alglib/gqgenhermite.h +63 -0
data/ext/alglib/gqgenjacobi.cpp +297 -0
data/ext/alglib/gqgenjacobi.h +72 -0
data/ext/alglib/gqgenlaguerre.cpp +265 -0
data/ext/alglib/gqgenlaguerre.h +69 -0
data/ext/alglib/gqgenlegendre.cpp +300 -0
data/ext/alglib/gqgenlegendre.h +62 -0
data/ext/alglib/gqgenlobatto.cpp +305 -0
data/ext/alglib/gqgenlobatto.h +97 -0
data/ext/alglib/gqgenradau.cpp +232 -0
data/ext/alglib/gqgenradau.h +95 -0
data/ext/alglib/hbisinv.cpp +480 -0
data/ext/alglib/hbisinv.h +183 -0
data/ext/alglib/hblas.cpp +228 -0
data/ext/alglib/hblas.h +64 -0
data/ext/alglib/hcholesky.cpp +339 -0
data/ext/alglib/hcholesky.h +91 -0
data/ext/alglib/hermite.cpp +114 -0
data/ext/alglib/hermite.h +49 -0
data/ext/alglib/hessenberg.cpp +370 -0
data/ext/alglib/hessenberg.h +152 -0
data/ext/alglib/hevd.cpp +247 -0
data/ext/alglib/hevd.h +107 -0
data/ext/alglib/hsschur.cpp +1316 -0
data/ext/alglib/hsschur.h +108 -0
data/ext/alglib/htridiagonal.cpp +734 -0
data/ext/alglib/htridiagonal.h +180 -0
data/ext/alglib/ialglib.cpp +6 -0
data/ext/alglib/ialglib.h +9 -0
data/ext/alglib/ibetaf.cpp +960 -0
data/ext/alglib/ibetaf.h +125 -0
data/ext/alglib/igammaf.cpp +430 -0
data/ext/alglib/igammaf.h +157 -0
data/ext/alglib/inv.cpp +274 -0
data/ext/alglib/inv.h +115 -0
data/ext/alglib/inverseupdate.cpp +480 -0
data/ext/alglib/inverseupdate.h +185 -0
data/ext/alglib/jacobianelliptic.cpp +164 -0
data/ext/alglib/jacobianelliptic.h +94 -0
data/ext/alglib/jarquebera.cpp +2271 -0
data/ext/alglib/jarquebera.h +80 -0
data/ext/alglib/kmeans.cpp +356 -0
data/ext/alglib/kmeans.h +76 -0
data/ext/alglib/laguerre.cpp +94 -0
data/ext/alglib/laguerre.h +48 -0
data/ext/alglib/lbfgs.cpp +1167 -0
data/ext/alglib/lbfgs.h +218 -0
data/ext/alglib/lda.cpp +434 -0
data/ext/alglib/lda.h +133 -0
data/ext/alglib/ldlt.cpp +1130 -0
data/ext/alglib/ldlt.h +124 -0
data/ext/alglib/leastsquares.cpp +1252 -0
data/ext/alglib/leastsquares.h +290 -0
data/ext/alglib/legendre.cpp +107 -0
data/ext/alglib/legendre.h +49 -0
data/ext/alglib/linreg.cpp +1185 -0
data/ext/alglib/linreg.h +380 -0
data/ext/alglib/logit.cpp +1523 -0
data/ext/alglib/logit.h +333 -0
data/ext/alglib/lq.cpp +399 -0
data/ext/alglib/lq.h +160 -0
data/ext/alglib/lu.cpp +462 -0
data/ext/alglib/lu.h +119 -0
data/ext/alglib/mannwhitneyu.cpp +4490 -0
data/ext/alglib/mannwhitneyu.h +115 -0
data/ext/alglib/minlm.cpp +918 -0
data/ext/alglib/minlm.h +312 -0
data/ext/alglib/mlpbase.cpp +3375 -0
data/ext/alglib/mlpbase.h +589 -0
data/ext/alglib/mlpe.cpp +1369 -0
data/ext/alglib/mlpe.h +552 -0
data/ext/alglib/mlptrain.cpp +1056 -0
data/ext/alglib/mlptrain.h +283 -0
data/ext/alglib/nearunityunit.cpp +91 -0
data/ext/alglib/nearunityunit.h +17 -0
data/ext/alglib/normaldistr.cpp +377 -0
data/ext/alglib/normaldistr.h +175 -0
data/ext/alglib/nsevd.cpp +1869 -0
data/ext/alglib/nsevd.h +140 -0
data/ext/alglib/pca.cpp +168 -0
data/ext/alglib/pca.h +87 -0
data/ext/alglib/poissondistr.cpp +143 -0
data/ext/alglib/poissondistr.h +130 -0
data/ext/alglib/polinterpolation.cpp +685 -0
data/ext/alglib/polinterpolation.h +206 -0
data/ext/alglib/psif.cpp +173 -0
data/ext/alglib/psif.h +88 -0
data/ext/alglib/qr.cpp +414 -0
data/ext/alglib/qr.h +168 -0
data/ext/alglib/ratinterpolation.cpp +134 -0
data/ext/alglib/ratinterpolation.h +72 -0
data/ext/alglib/rcond.cpp +705 -0
data/ext/alglib/rcond.h +140 -0
data/ext/alglib/reflections.cpp +504 -0
data/ext/alglib/reflections.h +165 -0
data/ext/alglib/rotations.cpp +473 -0
data/ext/alglib/rotations.h +128 -0
data/ext/alglib/rsolve.cpp +221 -0
data/ext/alglib/rsolve.h +99 -0
data/ext/alglib/sbisinv.cpp +217 -0
data/ext/alglib/sbisinv.h +171 -0
data/ext/alglib/sblas.cpp +185 -0
data/ext/alglib/sblas.h +64 -0
data/ext/alglib/schur.cpp +156 -0
data/ext/alglib/schur.h +102 -0
data/ext/alglib/sdet.cpp +193 -0
data/ext/alglib/sdet.h +101 -0
data/ext/alglib/sevd.cpp +116 -0
data/ext/alglib/sevd.h +99 -0
data/ext/alglib/sinverse.cpp +672 -0
data/ext/alglib/sinverse.h +138 -0
data/ext/alglib/spddet.cpp +138 -0
data/ext/alglib/spddet.h +96 -0
data/ext/alglib/spdgevd.cpp +842 -0
data/ext/alglib/spdgevd.h +200 -0
data/ext/alglib/spdinverse.cpp +509 -0
data/ext/alglib/spdinverse.h +122 -0
data/ext/alglib/spdrcond.cpp +421 -0
data/ext/alglib/spdrcond.h +118 -0
data/ext/alglib/spdsolve.cpp +275 -0
data/ext/alglib/spdsolve.h +105 -0
data/ext/alglib/spline2d.cpp +1192 -0
data/ext/alglib/spline2d.h +301 -0
data/ext/alglib/spline3.cpp +1264 -0
data/ext/alglib/spline3.h +290 -0
data/ext/alglib/srcond.cpp +595 -0
data/ext/alglib/srcond.h +127 -0
data/ext/alglib/ssolve.cpp +895 -0
data/ext/alglib/ssolve.h +139 -0
data/ext/alglib/stdafx.h +0 -0
data/ext/alglib/stest.cpp +131 -0
data/ext/alglib/stest.h +94 -0
data/ext/alglib/studenttdistr.cpp +222 -0
data/ext/alglib/studenttdistr.h +115 -0
data/ext/alglib/studentttests.cpp +377 -0
data/ext/alglib/studentttests.h +178 -0
data/ext/alglib/svd.cpp +620 -0
data/ext/alglib/svd.h +126 -0
data/ext/alglib/tdbisinv.cpp +2608 -0
data/ext/alglib/tdbisinv.h +228 -0
data/ext/alglib/tdevd.cpp +1229 -0
data/ext/alglib/tdevd.h +115 -0
data/ext/alglib/tridiagonal.cpp +594 -0
data/ext/alglib/tridiagonal.h +171 -0
data/ext/alglib/trigintegrals.cpp +490 -0
data/ext/alglib/trigintegrals.h +131 -0
data/ext/alglib/trinverse.cpp +345 -0
data/ext/alglib/trinverse.h +98 -0
data/ext/alglib/trlinsolve.cpp +926 -0
data/ext/alglib/trlinsolve.h +73 -0
data/ext/alglib/tsort.cpp +405 -0
data/ext/alglib/tsort.h +54 -0
data/ext/alglib/variancetests.cpp +245 -0
data/ext/alglib/variancetests.h +134 -0
data/ext/alglib/wsr.cpp +6285 -0
data/ext/alglib/wsr.h +96 -0
data/ext/ap.i +97 -0
data/ext/correlation.i +24 -0
data/ext/extconf.rb +6 -0
data/ext/logit.i +89 -0
data/lib/alglib.rb +71 -0
data/lib/alglib/correlation.rb +26 -0
data/lib/alglib/linearregression.rb +63 -0
data/lib/alglib/logit.rb +42 -0
data/test/test_alglib.rb +52 -0
data/test/test_correlation.rb +44 -0
data/test/test_correlationtest.rb +45 -0
data/test/test_linreg.rb +35 -0
data/test/test_logit.rb +43 -0
data/test/test_pca.rb +27 -0
metadata +326 -0

@@ -0,0 +1,80 @@
+/*************************************************************************
+Copyright (c) 2007, Sergey Bochkanov (ALGLIB project).
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+- Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer listed
+  in this license in the documentation and/or other materials
+  provided with the distribution.
+- Neither the name of the copyright holders nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*************************************************************************/
+#ifndef _jarquebera_h
+#define _jarquebera_h
+#include "ap.h"
+#include "ialglib.h"
+/*************************************************************************
+Jarque-Bera test
+This test checks hypotheses about the fact that a  given  sample  X  is  a
+sample of normal random variable.
+Requirements:
+    * the number of elements in the sample is not less than 5.
+Input parameters:
+    X   -   sample. Array whose index goes from 0 to N-1.
+    N   -   size of the sample. N>=5
+Output parameters:
+    BothTails   -   p-value for two-tailed test.
+                    If BothTails is less than the given significance level
+                    the null hypothesis is rejected.
+    LeftTail    -   p-value for left-tailed test.
+                    If LeftTail is less than the given significance level,
+                    the null hypothesis is rejected.
+    RightTail   -   p-value for right-tailed test.
+                    If RightTail is less than the given significance level
+                    the null hypothesis is rejected.
+Accuracy of the approximation used (5<=N<=1951):
+p-value  	    relative error (5<=N<=1951)
+[1, 0.1]            < 1%
+[0.1, 0.01]         < 2%
+[0.01, 0.001]       < 6%
+[0.001, 0]          wasn't measured
+For N>1951 accuracy wasn't measured but it shouldn't be sharply  different
+from table values.
+  -- ALGLIB --
+     Copyright 09.04.2007 by Bochkanov Sergey
+*************************************************************************/
+void jarqueberatest(const ap::real_1d_array& x, int n, double& p);
+#endif

data/ext/alglib/kmeans.cpp ADDED

@@ -0,0 +1,356 @@
+/*************************************************************************
+Copyright (c) 2008, Sergey Bochkanov (ALGLIB project).
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+- Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer listed
+  in this license in the documentation and/or other materials
+  provided with the distribution.
+- Neither the name of the copyright holders nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*************************************************************************/
+#include <stdafx.h>
+#include "kmeans.h"
+static bool selectcenterpp(const ap::real_2d_array& xy,
+     int npoints,
+     int nvars,
+     ap::real_2d_array& centers,
+     ap::boolean_1d_array busycenters,
+     int ccnt,
+     ap::real_1d_array& d2,
+     ap::real_1d_array& p,
+     ap::real_1d_array& tmp);
+/*************************************************************************
+k-means++ clusterization
+INPUT PARAMETERS:
+    XY          -   dataset, array [0..NPoints-1,0..NVars-1].
+    NPoints     -   dataset size, NPoints>=K
+    NVars       -   number of variables, NVars>=1
+    K           -   desired number of clusters, K>=1
+    Restarts    -   number of restarts, Restarts>=1
+OUTPUT PARAMETERS:
+    Info        -   return code:
+                    * -3, if taskis degenerate (number of distinct points is
+                          less than K)
+                    * -1, if incorrect NPoints/NFeatures/K/Restarts was passed
+                    *  1, if subroutine finished successfully
+    C           -   array[0..NVars-1,0..K-1].matrix whose columns store
+                    cluster's centers
+    XYC         -   array which contains number of clusters dataset points
+                    belong to.
+  -- ALGLIB --
+     Copyright 21.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void kmeansgenerate(const ap::real_2d_array& xy,
+     int npoints,
+     int nvars,
+     int k,
+     int restarts,
+     int& info,
+     ap::real_2d_array& c,
+     ap::integer_1d_array& xyc)
+{
+    int i;
+    int j;
+    ap::real_2d_array ct;
+    ap::real_2d_array ctbest;
+    double e;
+    double ebest;
+    ap::real_1d_array x;
+    ap::real_1d_array tmp;
+    int cc;
+    ap::real_1d_array d2;
+    ap::real_1d_array p;
+    ap::integer_1d_array csizes;
+    ap::boolean_1d_array cbusy;
+    double v;
+    double s;
+    int cclosest;
+    double dclosest;
+    ap::real_1d_array work;
+    bool waschanges;
+    bool zerosizeclusters;
+    int pass;
+    //
+    // Test parameters
+    //
+    if( npoints<k||nvars<1||k<1||restarts<1 )
+    {
+        info = -1;
+        return;
+    }
+    //
+    // TODO: special case K=1
+    // TODO: special case K=NPoints
+    //
+    info = 1;
+    //
+    // Multiple passes of k-means++ algorithm
+    //
+    ct.setbounds(0, k-1, 0, nvars-1);
+    ctbest.setbounds(0, k-1, 0, nvars-1);
+    xyc.setbounds(0, npoints-1);
+    d2.setbounds(0, npoints-1);
+    p.setbounds(0, npoints-1);
+    tmp.setbounds(0, nvars-1);
+    csizes.setbounds(0, k-1);
+    cbusy.setbounds(0, k-1);
+    ebest = ap::maxrealnumber;
+    for(pass = 1; pass <= restarts; pass++)
+    {
+        //
+        // Select initial centers  using k-means++ algorithm
+        // 1. Choose first center at random
+        // 2. Choose next centers using their distance from centers already chosen
+        //
+        // Note that for performance reasons centers are stored in ROWS of CT, not
+        // in columns. We'll transpose CT in the end and store it in the C.
+        //
+        i = ap::randominteger(npoints);
+        ap::vmove(&ct(0, 0), &xy(i, 0), ap::vlen(0,nvars-1));
+        cbusy(0) = true;
+        for(i = 1; i <= k-1; i++)
+        {
+            cbusy(i) = false;
+        }
+        if( !selectcenterpp(xy, npoints, nvars, ct, cbusy, k, d2, p, tmp) )
+        {
+            info = -3;
+            return;
+        }
+        //
+        // Update centers:
+        // 2. update center positions
+        //
+        while(true)
+        {
+            //
+            // fill XYC with center numbers
+            //
+            waschanges = false;
+            for(i = 0; i <= npoints-1; i++)
+            {
+                cclosest = -1;
+                dclosest = ap::maxrealnumber;
+                for(j = 0; j <= k-1; j++)
+                {
+                    ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
+                    ap::vsub(&tmp(0), &ct(j, 0), ap::vlen(0,nvars-1));
+                    v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
+                    if( v<dclosest )
+                    {
+                        cclosest = j;
+                        dclosest = v;
+                    }
+                }
+                if( xyc(i)!=cclosest )
+                {
+                    waschanges = true;
+                }
+                xyc(i) = cclosest;
+            }
+            //
+            // Update centers
+            //
+            for(j = 0; j <= k-1; j++)
+            {
+                csizes(j) = 0;
+            }
+            for(i = 0; i <= k-1; i++)
+            {
+                for(j = 0; j <= nvars-1; j++)
+                {
+                    ct(i,j) = 0;
+                }
+            }
+            for(i = 0; i <= npoints-1; i++)
+            {
+                csizes(xyc(i)) = csizes(xyc(i))+1;
+                ap::vadd(&ct(xyc(i), 0), &xy(i, 0), ap::vlen(0,nvars-1));
+            }
+            zerosizeclusters = false;
+            for(i = 0; i <= k-1; i++)
+            {
+                cbusy(i) = csizes(i)!=0;
+                zerosizeclusters = zerosizeclusters||csizes(i)==0;
+            }
+            if( zerosizeclusters )
+            {
+                //
+                // Some clusters have zero size - rare, but possible.
+                // We'll choose new centers for such clusters using k-means++ rule
+                // and restart algorithm
+                //
+                if( !selectcenterpp(xy, npoints, nvars, ct, cbusy, k, d2, p, tmp) )
+                {
+                    info = -3;
+                    return;
+                }
+                continue;
+            }
+            for(j = 0; j <= k-1; j++)
+            {
+                v = double(1)/double(csizes(j));
+                ap::vmul(&ct(j, 0), ap::vlen(0,nvars-1), v);
+            }
+            //
+            // if nothing has changed during iteration
+            //
+            if( !waschanges )
+            {
+                break;
+            }
+        }
+        //
+        // 3. Calculate E, compare with best centers found so far
+        //
+        e = 0;
+        for(i = 0; i <= npoints-1; i++)
+        {
+            ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
+            ap::vsub(&tmp(0), &ct(xyc(i), 0), ap::vlen(0,nvars-1));
+            v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
+            e = e+v;
+        }
+        if( e<ebest )
+        {
+            //
+            // store partition
+            //
+            copymatrix(ct, 0, k-1, 0, nvars-1, ctbest, 0, k-1, 0, nvars-1);
+        }
+    }
+    //
+    // Copy and transpose
+    //
+    c.setbounds(0, nvars-1, 0, k-1);
+    copyandtranspose(ctbest, 0, k-1, 0, nvars-1, c, 0, nvars-1, 0, k-1);
+}
+/*************************************************************************
+Select center for a new cluster using k-means++ rule
+*************************************************************************/
+static bool selectcenterpp(const ap::real_2d_array& xy,
+     int npoints,
+     int nvars,
+     ap::real_2d_array& centers,
+     ap::boolean_1d_array busycenters,
+     int ccnt,
+     ap::real_1d_array& d2,
+     ap::real_1d_array& p,
+     ap::real_1d_array& tmp)
+{
+    bool result;
+    int i;
+    int j;
+    int cc;
+    double v;
+    double s;
+    result = true;
+    for(cc = 0; cc <= ccnt-1; cc++)
+    {
+        if( !busycenters(cc) )
+        {
+            //
+            // fill D2
+            //
+            for(i = 0; i <= npoints-1; i++)
+            {
+                d2(i) = ap::maxrealnumber;
+                for(j = 0; j <= ccnt-1; j++)
+                {
+                    if( busycenters(j) )
+                    {
+                        ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
+                        ap::vsub(&tmp(0), &centers(j, 0), ap::vlen(0,nvars-1));
+                        v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
+                        if( v<d2(i) )
+                        {
+                            d2(i) = v;
+                        }
+                    }
+                }
+            }
+            //
+            // calculate P (non-cumulative)
+            //
+            s = 0;
+            for(i = 0; i <= npoints-1; i++)
+            {
+                s = s+d2(i);
+            }
+            if( s==0 )
+            {
+                result = false;
+                return result;
+            }
+            s = 1/s;
+            ap::vmove(&p(0), &d2(0), ap::vlen(0,npoints-1), s);
+            //
+            // choose one of points with probability P
+            // random number within (0,1) is generated and
+            // inverse empirical CDF is used to randomly choose a point.
+            //
+            s = 0;
+            v = ap::randomreal();
+            for(i = 0; i <= npoints-1; i++)
+            {
+                s = s+p(i);
+                if( v<=s||i==npoints-1 )
+                {
+                    ap::vmove(&centers(cc, 0), &xy(i, 0), ap::vlen(0,nvars-1));
+                    busycenters(cc) = true;
+                    break;
+                }
+            }
+        }
+    }
+    return result;
+}

data/ext/alglib/kmeans.h ADDED

@@ -0,0 +1,76 @@
+/*************************************************************************
+Copyright (c) 2008, Sergey Bochkanov (ALGLIB project).
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+- Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer listed
+  in this license in the documentation and/or other materials
+  provided with the distribution.
+- Neither the name of the copyright holders nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*************************************************************************/
+#ifndef _kmeans_h
+#define _kmeans_h
+#include "ap.h"
+#include "ialglib.h"
+#include "blas.h"
+/*************************************************************************
+k-means++ clusterization
+INPUT PARAMETERS:
+    XY          -   dataset, array [0..NPoints-1,0..NVars-1].
+    NPoints     -   dataset size, NPoints>=K
+    NVars       -   number of variables, NVars>=1
+    K           -   desired number of clusters, K>=1
+    Restarts    -   number of restarts, Restarts>=1
+OUTPUT PARAMETERS:
+    Info        -   return code:
+                    * -3, if taskis degenerate (number of distinct points is
+                          less than K)
+                    * -1, if incorrect NPoints/NFeatures/K/Restarts was passed
+                    *  1, if subroutine finished successfully
+    C           -   array[0..NVars-1,0..K-1].matrix whose columns store
+                    cluster's centers
+    XYC         -   array which contains number of clusters dataset points
+                    belong to.
+  -- ALGLIB --
+     Copyright 21.03.2009 by Bochkanov Sergey
+*************************************************************************/
+void kmeansgenerate(const ap::real_2d_array& xy,
+     int npoints,
+     int nvars,
+     int k,
+     int restarts,
+     int& info,
+     ap::real_2d_array& c,
+     ap::integer_1d_array& xyc);
+#endif