alglib 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/History.txt +7 -0
  2. data/Manifest.txt +253 -0
  3. data/README.txt +33 -0
  4. data/Rakefile +27 -0
  5. data/ext/Rakefile +24 -0
  6. data/ext/alglib.i +24 -0
  7. data/ext/alglib/Makefile +157 -0
  8. data/ext/alglib/airyf.cpp +372 -0
  9. data/ext/alglib/airyf.h +81 -0
  10. data/ext/alglib/alglib.cpp +8558 -0
  11. data/ext/alglib/alglib_util.cpp +19 -0
  12. data/ext/alglib/alglib_util.h +14 -0
  13. data/ext/alglib/ap.cpp +877 -0
  14. data/ext/alglib/ap.english.html +364 -0
  15. data/ext/alglib/ap.h +666 -0
  16. data/ext/alglib/ap.russian.html +442 -0
  17. data/ext/alglib/apvt.h +754 -0
  18. data/ext/alglib/bdss.cpp +1500 -0
  19. data/ext/alglib/bdss.h +251 -0
  20. data/ext/alglib/bdsvd.cpp +1339 -0
  21. data/ext/alglib/bdsvd.h +164 -0
  22. data/ext/alglib/bessel.cpp +1226 -0
  23. data/ext/alglib/bessel.h +331 -0
  24. data/ext/alglib/betaf.cpp +105 -0
  25. data/ext/alglib/betaf.h +74 -0
  26. data/ext/alglib/bidiagonal.cpp +1328 -0
  27. data/ext/alglib/bidiagonal.h +350 -0
  28. data/ext/alglib/binomialdistr.cpp +247 -0
  29. data/ext/alglib/binomialdistr.h +153 -0
  30. data/ext/alglib/blas.cpp +576 -0
  31. data/ext/alglib/blas.h +132 -0
  32. data/ext/alglib/cblas.cpp +226 -0
  33. data/ext/alglib/cblas.h +57 -0
  34. data/ext/alglib/cdet.cpp +138 -0
  35. data/ext/alglib/cdet.h +92 -0
  36. data/ext/alglib/chebyshev.cpp +216 -0
  37. data/ext/alglib/chebyshev.h +76 -0
  38. data/ext/alglib/chisquaredistr.cpp +157 -0
  39. data/ext/alglib/chisquaredistr.h +144 -0
  40. data/ext/alglib/cholesky.cpp +285 -0
  41. data/ext/alglib/cholesky.h +86 -0
  42. data/ext/alglib/cinverse.cpp +298 -0
  43. data/ext/alglib/cinverse.h +111 -0
  44. data/ext/alglib/clu.cpp +337 -0
  45. data/ext/alglib/clu.h +120 -0
  46. data/ext/alglib/correlation.cpp +280 -0
  47. data/ext/alglib/correlation.h +77 -0
  48. data/ext/alglib/correlationtests.cpp +726 -0
  49. data/ext/alglib/correlationtests.h +134 -0
  50. data/ext/alglib/crcond.cpp +826 -0
  51. data/ext/alglib/crcond.h +148 -0
  52. data/ext/alglib/creflections.cpp +310 -0
  53. data/ext/alglib/creflections.h +165 -0
  54. data/ext/alglib/csolve.cpp +312 -0
  55. data/ext/alglib/csolve.h +99 -0
  56. data/ext/alglib/ctrinverse.cpp +387 -0
  57. data/ext/alglib/ctrinverse.h +98 -0
  58. data/ext/alglib/ctrlinsolve.cpp +297 -0
  59. data/ext/alglib/ctrlinsolve.h +81 -0
  60. data/ext/alglib/dawson.cpp +234 -0
  61. data/ext/alglib/dawson.h +74 -0
  62. data/ext/alglib/descriptivestatistics.cpp +436 -0
  63. data/ext/alglib/descriptivestatistics.h +112 -0
  64. data/ext/alglib/det.cpp +140 -0
  65. data/ext/alglib/det.h +94 -0
  66. data/ext/alglib/dforest.cpp +1819 -0
  67. data/ext/alglib/dforest.h +316 -0
  68. data/ext/alglib/elliptic.cpp +497 -0
  69. data/ext/alglib/elliptic.h +217 -0
  70. data/ext/alglib/estnorm.cpp +429 -0
  71. data/ext/alglib/estnorm.h +107 -0
  72. data/ext/alglib/expintegrals.cpp +422 -0
  73. data/ext/alglib/expintegrals.h +108 -0
  74. data/ext/alglib/faq.english.html +258 -0
  75. data/ext/alglib/faq.russian.html +272 -0
  76. data/ext/alglib/fdistr.cpp +202 -0
  77. data/ext/alglib/fdistr.h +163 -0
  78. data/ext/alglib/fresnel.cpp +211 -0
  79. data/ext/alglib/fresnel.h +91 -0
  80. data/ext/alglib/gammaf.cpp +338 -0
  81. data/ext/alglib/gammaf.h +104 -0
  82. data/ext/alglib/gqgengauss.cpp +235 -0
  83. data/ext/alglib/gqgengauss.h +92 -0
  84. data/ext/alglib/gqgenhermite.cpp +268 -0
  85. data/ext/alglib/gqgenhermite.h +63 -0
  86. data/ext/alglib/gqgenjacobi.cpp +297 -0
  87. data/ext/alglib/gqgenjacobi.h +72 -0
  88. data/ext/alglib/gqgenlaguerre.cpp +265 -0
  89. data/ext/alglib/gqgenlaguerre.h +69 -0
  90. data/ext/alglib/gqgenlegendre.cpp +300 -0
  91. data/ext/alglib/gqgenlegendre.h +62 -0
  92. data/ext/alglib/gqgenlobatto.cpp +305 -0
  93. data/ext/alglib/gqgenlobatto.h +97 -0
  94. data/ext/alglib/gqgenradau.cpp +232 -0
  95. data/ext/alglib/gqgenradau.h +95 -0
  96. data/ext/alglib/hbisinv.cpp +480 -0
  97. data/ext/alglib/hbisinv.h +183 -0
  98. data/ext/alglib/hblas.cpp +228 -0
  99. data/ext/alglib/hblas.h +64 -0
  100. data/ext/alglib/hcholesky.cpp +339 -0
  101. data/ext/alglib/hcholesky.h +91 -0
  102. data/ext/alglib/hermite.cpp +114 -0
  103. data/ext/alglib/hermite.h +49 -0
  104. data/ext/alglib/hessenberg.cpp +370 -0
  105. data/ext/alglib/hessenberg.h +152 -0
  106. data/ext/alglib/hevd.cpp +247 -0
  107. data/ext/alglib/hevd.h +107 -0
  108. data/ext/alglib/hsschur.cpp +1316 -0
  109. data/ext/alglib/hsschur.h +108 -0
  110. data/ext/alglib/htridiagonal.cpp +734 -0
  111. data/ext/alglib/htridiagonal.h +180 -0
  112. data/ext/alglib/ialglib.cpp +6 -0
  113. data/ext/alglib/ialglib.h +9 -0
  114. data/ext/alglib/ibetaf.cpp +960 -0
  115. data/ext/alglib/ibetaf.h +125 -0
  116. data/ext/alglib/igammaf.cpp +430 -0
  117. data/ext/alglib/igammaf.h +157 -0
  118. data/ext/alglib/inv.cpp +274 -0
  119. data/ext/alglib/inv.h +115 -0
  120. data/ext/alglib/inverseupdate.cpp +480 -0
  121. data/ext/alglib/inverseupdate.h +185 -0
  122. data/ext/alglib/jacobianelliptic.cpp +164 -0
  123. data/ext/alglib/jacobianelliptic.h +94 -0
  124. data/ext/alglib/jarquebera.cpp +2271 -0
  125. data/ext/alglib/jarquebera.h +80 -0
  126. data/ext/alglib/kmeans.cpp +356 -0
  127. data/ext/alglib/kmeans.h +76 -0
  128. data/ext/alglib/laguerre.cpp +94 -0
  129. data/ext/alglib/laguerre.h +48 -0
  130. data/ext/alglib/lbfgs.cpp +1167 -0
  131. data/ext/alglib/lbfgs.h +218 -0
  132. data/ext/alglib/lda.cpp +434 -0
  133. data/ext/alglib/lda.h +133 -0
  134. data/ext/alglib/ldlt.cpp +1130 -0
  135. data/ext/alglib/ldlt.h +124 -0
  136. data/ext/alglib/leastsquares.cpp +1252 -0
  137. data/ext/alglib/leastsquares.h +290 -0
  138. data/ext/alglib/legendre.cpp +107 -0
  139. data/ext/alglib/legendre.h +49 -0
  140. data/ext/alglib/linreg.cpp +1185 -0
  141. data/ext/alglib/linreg.h +380 -0
  142. data/ext/alglib/logit.cpp +1523 -0
  143. data/ext/alglib/logit.h +333 -0
  144. data/ext/alglib/lq.cpp +399 -0
  145. data/ext/alglib/lq.h +160 -0
  146. data/ext/alglib/lu.cpp +462 -0
  147. data/ext/alglib/lu.h +119 -0
  148. data/ext/alglib/mannwhitneyu.cpp +4490 -0
  149. data/ext/alglib/mannwhitneyu.h +115 -0
  150. data/ext/alglib/minlm.cpp +918 -0
  151. data/ext/alglib/minlm.h +312 -0
  152. data/ext/alglib/mlpbase.cpp +3375 -0
  153. data/ext/alglib/mlpbase.h +589 -0
  154. data/ext/alglib/mlpe.cpp +1369 -0
  155. data/ext/alglib/mlpe.h +552 -0
  156. data/ext/alglib/mlptrain.cpp +1056 -0
  157. data/ext/alglib/mlptrain.h +283 -0
  158. data/ext/alglib/nearunityunit.cpp +91 -0
  159. data/ext/alglib/nearunityunit.h +17 -0
  160. data/ext/alglib/normaldistr.cpp +377 -0
  161. data/ext/alglib/normaldistr.h +175 -0
  162. data/ext/alglib/nsevd.cpp +1869 -0
  163. data/ext/alglib/nsevd.h +140 -0
  164. data/ext/alglib/pca.cpp +168 -0
  165. data/ext/alglib/pca.h +87 -0
  166. data/ext/alglib/poissondistr.cpp +143 -0
  167. data/ext/alglib/poissondistr.h +130 -0
  168. data/ext/alglib/polinterpolation.cpp +685 -0
  169. data/ext/alglib/polinterpolation.h +206 -0
  170. data/ext/alglib/psif.cpp +173 -0
  171. data/ext/alglib/psif.h +88 -0
  172. data/ext/alglib/qr.cpp +414 -0
  173. data/ext/alglib/qr.h +168 -0
  174. data/ext/alglib/ratinterpolation.cpp +134 -0
  175. data/ext/alglib/ratinterpolation.h +72 -0
  176. data/ext/alglib/rcond.cpp +705 -0
  177. data/ext/alglib/rcond.h +140 -0
  178. data/ext/alglib/reflections.cpp +504 -0
  179. data/ext/alglib/reflections.h +165 -0
  180. data/ext/alglib/rotations.cpp +473 -0
  181. data/ext/alglib/rotations.h +128 -0
  182. data/ext/alglib/rsolve.cpp +221 -0
  183. data/ext/alglib/rsolve.h +99 -0
  184. data/ext/alglib/sbisinv.cpp +217 -0
  185. data/ext/alglib/sbisinv.h +171 -0
  186. data/ext/alglib/sblas.cpp +185 -0
  187. data/ext/alglib/sblas.h +64 -0
  188. data/ext/alglib/schur.cpp +156 -0
  189. data/ext/alglib/schur.h +102 -0
  190. data/ext/alglib/sdet.cpp +193 -0
  191. data/ext/alglib/sdet.h +101 -0
  192. data/ext/alglib/sevd.cpp +116 -0
  193. data/ext/alglib/sevd.h +99 -0
  194. data/ext/alglib/sinverse.cpp +672 -0
  195. data/ext/alglib/sinverse.h +138 -0
  196. data/ext/alglib/spddet.cpp +138 -0
  197. data/ext/alglib/spddet.h +96 -0
  198. data/ext/alglib/spdgevd.cpp +842 -0
  199. data/ext/alglib/spdgevd.h +200 -0
  200. data/ext/alglib/spdinverse.cpp +509 -0
  201. data/ext/alglib/spdinverse.h +122 -0
  202. data/ext/alglib/spdrcond.cpp +421 -0
  203. data/ext/alglib/spdrcond.h +118 -0
  204. data/ext/alglib/spdsolve.cpp +275 -0
  205. data/ext/alglib/spdsolve.h +105 -0
  206. data/ext/alglib/spline2d.cpp +1192 -0
  207. data/ext/alglib/spline2d.h +301 -0
  208. data/ext/alglib/spline3.cpp +1264 -0
  209. data/ext/alglib/spline3.h +290 -0
  210. data/ext/alglib/srcond.cpp +595 -0
  211. data/ext/alglib/srcond.h +127 -0
  212. data/ext/alglib/ssolve.cpp +895 -0
  213. data/ext/alglib/ssolve.h +139 -0
  214. data/ext/alglib/stdafx.h +0 -0
  215. data/ext/alglib/stest.cpp +131 -0
  216. data/ext/alglib/stest.h +94 -0
  217. data/ext/alglib/studenttdistr.cpp +222 -0
  218. data/ext/alglib/studenttdistr.h +115 -0
  219. data/ext/alglib/studentttests.cpp +377 -0
  220. data/ext/alglib/studentttests.h +178 -0
  221. data/ext/alglib/svd.cpp +620 -0
  222. data/ext/alglib/svd.h +126 -0
  223. data/ext/alglib/tdbisinv.cpp +2608 -0
  224. data/ext/alglib/tdbisinv.h +228 -0
  225. data/ext/alglib/tdevd.cpp +1229 -0
  226. data/ext/alglib/tdevd.h +115 -0
  227. data/ext/alglib/tridiagonal.cpp +594 -0
  228. data/ext/alglib/tridiagonal.h +171 -0
  229. data/ext/alglib/trigintegrals.cpp +490 -0
  230. data/ext/alglib/trigintegrals.h +131 -0
  231. data/ext/alglib/trinverse.cpp +345 -0
  232. data/ext/alglib/trinverse.h +98 -0
  233. data/ext/alglib/trlinsolve.cpp +926 -0
  234. data/ext/alglib/trlinsolve.h +73 -0
  235. data/ext/alglib/tsort.cpp +405 -0
  236. data/ext/alglib/tsort.h +54 -0
  237. data/ext/alglib/variancetests.cpp +245 -0
  238. data/ext/alglib/variancetests.h +134 -0
  239. data/ext/alglib/wsr.cpp +6285 -0
  240. data/ext/alglib/wsr.h +96 -0
  241. data/ext/ap.i +97 -0
  242. data/ext/correlation.i +24 -0
  243. data/ext/extconf.rb +6 -0
  244. data/ext/logit.i +89 -0
  245. data/lib/alglib.rb +71 -0
  246. data/lib/alglib/correlation.rb +26 -0
  247. data/lib/alglib/linearregression.rb +63 -0
  248. data/lib/alglib/logit.rb +42 -0
  249. data/test/test_alglib.rb +52 -0
  250. data/test/test_correlation.rb +44 -0
  251. data/test/test_correlationtest.rb +45 -0
  252. data/test/test_linreg.rb +35 -0
  253. data/test/test_logit.rb +43 -0
  254. data/test/test_pca.rb +27 -0
  255. metadata +326 -0
@@ -0,0 +1,80 @@
1
+ /*************************************************************************
2
+ Copyright (c) 2007, Sergey Bochkanov (ALGLIB project).
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are
6
+ met:
7
+
8
+ - Redistributions of source code must retain the above copyright
9
+ notice, this list of conditions and the following disclaimer.
10
+
11
+ - Redistributions in binary form must reproduce the above copyright
12
+ notice, this list of conditions and the following disclaimer listed
13
+ in this license in the documentation and/or other materials
14
+ provided with the distribution.
15
+
16
+ - Neither the name of the copyright holders nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ *************************************************************************/
32
+
33
+ #ifndef _jarquebera_h
34
+ #define _jarquebera_h
35
+
36
+ #include "ap.h"
37
+ #include "ialglib.h"
38
+
39
+ /*************************************************************************
40
+ Jarque-Bera test
41
+
42
+ This test checks hypotheses about the fact that a given sample X is a
43
+ sample of normal random variable.
44
+
45
+ Requirements:
46
+ * the number of elements in the sample is not less than 5.
47
+
48
+ Input parameters:
49
+ X - sample. Array whose index goes from 0 to N-1.
50
+ N - size of the sample. N>=5
51
+
52
+ Output parameters:
53
+ BothTails - p-value for two-tailed test.
54
+ If BothTails is less than the given significance level
55
+ the null hypothesis is rejected.
56
+ LeftTail - p-value for left-tailed test.
57
+ If LeftTail is less than the given significance level,
58
+ the null hypothesis is rejected.
59
+ RightTail - p-value for right-tailed test.
60
+ If RightTail is less than the given significance level
61
+ the null hypothesis is rejected.
62
+
63
+ Accuracy of the approximation used (5<=N<=1951):
64
+
65
+ p-value relative error (5<=N<=1951)
66
+ [1, 0.1] < 1%
67
+ [0.1, 0.01] < 2%
68
+ [0.01, 0.001] < 6%
69
+ [0.001, 0] wasn't measured
70
+
71
+ For N>1951 accuracy wasn't measured but it shouldn't be sharply different
72
+ from table values.
73
+
74
+ -- ALGLIB --
75
+ Copyright 09.04.2007 by Bochkanov Sergey
76
+ *************************************************************************/
77
+ void jarqueberatest(const ap::real_1d_array& x, int n, double& p);
78
+
79
+
80
+ #endif
@@ -0,0 +1,356 @@
1
+ /*************************************************************************
2
+ Copyright (c) 2008, Sergey Bochkanov (ALGLIB project).
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are
6
+ met:
7
+
8
+ - Redistributions of source code must retain the above copyright
9
+ notice, this list of conditions and the following disclaimer.
10
+
11
+ - Redistributions in binary form must reproduce the above copyright
12
+ notice, this list of conditions and the following disclaimer listed
13
+ in this license in the documentation and/or other materials
14
+ provided with the distribution.
15
+
16
+ - Neither the name of the copyright holders nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ *************************************************************************/
32
+
33
+ #include <stdafx.h>
34
+ #include "kmeans.h"
35
+
36
+ static bool selectcenterpp(const ap::real_2d_array& xy,
37
+ int npoints,
38
+ int nvars,
39
+ ap::real_2d_array& centers,
40
+ ap::boolean_1d_array busycenters,
41
+ int ccnt,
42
+ ap::real_1d_array& d2,
43
+ ap::real_1d_array& p,
44
+ ap::real_1d_array& tmp);
45
+
46
+ /*************************************************************************
47
+ k-means++ clusterization
48
+
49
+ INPUT PARAMETERS:
50
+ XY - dataset, array [0..NPoints-1,0..NVars-1].
51
+ NPoints - dataset size, NPoints>=K
52
+ NVars - number of variables, NVars>=1
53
+ K - desired number of clusters, K>=1
54
+ Restarts - number of restarts, Restarts>=1
55
+
56
+ OUTPUT PARAMETERS:
57
+ Info - return code:
58
+ * -3, if taskis degenerate (number of distinct points is
59
+ less than K)
60
+ * -1, if incorrect NPoints/NFeatures/K/Restarts was passed
61
+ * 1, if subroutine finished successfully
62
+ C - array[0..NVars-1,0..K-1].matrix whose columns store
63
+ cluster's centers
64
+ XYC - array which contains number of clusters dataset points
65
+ belong to.
66
+
67
+ -- ALGLIB --
68
+ Copyright 21.03.2009 by Bochkanov Sergey
69
+ *************************************************************************/
70
+ void kmeansgenerate(const ap::real_2d_array& xy,
71
+ int npoints,
72
+ int nvars,
73
+ int k,
74
+ int restarts,
75
+ int& info,
76
+ ap::real_2d_array& c,
77
+ ap::integer_1d_array& xyc)
78
+ {
79
+ int i;
80
+ int j;
81
+ ap::real_2d_array ct;
82
+ ap::real_2d_array ctbest;
83
+ double e;
84
+ double ebest;
85
+ ap::real_1d_array x;
86
+ ap::real_1d_array tmp;
87
+ int cc;
88
+ ap::real_1d_array d2;
89
+ ap::real_1d_array p;
90
+ ap::integer_1d_array csizes;
91
+ ap::boolean_1d_array cbusy;
92
+ double v;
93
+ double s;
94
+ int cclosest;
95
+ double dclosest;
96
+ ap::real_1d_array work;
97
+ bool waschanges;
98
+ bool zerosizeclusters;
99
+ int pass;
100
+
101
+
102
+ //
103
+ // Test parameters
104
+ //
105
+ if( npoints<k||nvars<1||k<1||restarts<1 )
106
+ {
107
+ info = -1;
108
+ return;
109
+ }
110
+
111
+ //
112
+ // TODO: special case K=1
113
+ // TODO: special case K=NPoints
114
+ //
115
+ info = 1;
116
+
117
+ //
118
+ // Multiple passes of k-means++ algorithm
119
+ //
120
+ ct.setbounds(0, k-1, 0, nvars-1);
121
+ ctbest.setbounds(0, k-1, 0, nvars-1);
122
+ xyc.setbounds(0, npoints-1);
123
+ d2.setbounds(0, npoints-1);
124
+ p.setbounds(0, npoints-1);
125
+ tmp.setbounds(0, nvars-1);
126
+ csizes.setbounds(0, k-1);
127
+ cbusy.setbounds(0, k-1);
128
+ ebest = ap::maxrealnumber;
129
+ for(pass = 1; pass <= restarts; pass++)
130
+ {
131
+
132
+ //
133
+ // Select initial centers using k-means++ algorithm
134
+ // 1. Choose first center at random
135
+ // 2. Choose next centers using their distance from centers already chosen
136
+ //
137
+ // Note that for performance reasons centers are stored in ROWS of CT, not
138
+ // in columns. We'll transpose CT in the end and store it in the C.
139
+ //
140
+ i = ap::randominteger(npoints);
141
+ ap::vmove(&ct(0, 0), &xy(i, 0), ap::vlen(0,nvars-1));
142
+ cbusy(0) = true;
143
+ for(i = 1; i <= k-1; i++)
144
+ {
145
+ cbusy(i) = false;
146
+ }
147
+ if( !selectcenterpp(xy, npoints, nvars, ct, cbusy, k, d2, p, tmp) )
148
+ {
149
+ info = -3;
150
+ return;
151
+ }
152
+
153
+ //
154
+ // Update centers:
155
+ // 2. update center positions
156
+ //
157
+ while(true)
158
+ {
159
+
160
+ //
161
+ // fill XYC with center numbers
162
+ //
163
+ waschanges = false;
164
+ for(i = 0; i <= npoints-1; i++)
165
+ {
166
+ cclosest = -1;
167
+ dclosest = ap::maxrealnumber;
168
+ for(j = 0; j <= k-1; j++)
169
+ {
170
+ ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
171
+ ap::vsub(&tmp(0), &ct(j, 0), ap::vlen(0,nvars-1));
172
+ v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
173
+ if( v<dclosest )
174
+ {
175
+ cclosest = j;
176
+ dclosest = v;
177
+ }
178
+ }
179
+ if( xyc(i)!=cclosest )
180
+ {
181
+ waschanges = true;
182
+ }
183
+ xyc(i) = cclosest;
184
+ }
185
+
186
+ //
187
+ // Update centers
188
+ //
189
+ for(j = 0; j <= k-1; j++)
190
+ {
191
+ csizes(j) = 0;
192
+ }
193
+ for(i = 0; i <= k-1; i++)
194
+ {
195
+ for(j = 0; j <= nvars-1; j++)
196
+ {
197
+ ct(i,j) = 0;
198
+ }
199
+ }
200
+ for(i = 0; i <= npoints-1; i++)
201
+ {
202
+ csizes(xyc(i)) = csizes(xyc(i))+1;
203
+ ap::vadd(&ct(xyc(i), 0), &xy(i, 0), ap::vlen(0,nvars-1));
204
+ }
205
+ zerosizeclusters = false;
206
+ for(i = 0; i <= k-1; i++)
207
+ {
208
+ cbusy(i) = csizes(i)!=0;
209
+ zerosizeclusters = zerosizeclusters||csizes(i)==0;
210
+ }
211
+ if( zerosizeclusters )
212
+ {
213
+
214
+ //
215
+ // Some clusters have zero size - rare, but possible.
216
+ // We'll choose new centers for such clusters using k-means++ rule
217
+ // and restart algorithm
218
+ //
219
+ if( !selectcenterpp(xy, npoints, nvars, ct, cbusy, k, d2, p, tmp) )
220
+ {
221
+ info = -3;
222
+ return;
223
+ }
224
+ continue;
225
+ }
226
+ for(j = 0; j <= k-1; j++)
227
+ {
228
+ v = double(1)/double(csizes(j));
229
+ ap::vmul(&ct(j, 0), ap::vlen(0,nvars-1), v);
230
+ }
231
+
232
+ //
233
+ // if nothing has changed during iteration
234
+ //
235
+ if( !waschanges )
236
+ {
237
+ break;
238
+ }
239
+ }
240
+
241
+ //
242
+ // 3. Calculate E, compare with best centers found so far
243
+ //
244
+ e = 0;
245
+ for(i = 0; i <= npoints-1; i++)
246
+ {
247
+ ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
248
+ ap::vsub(&tmp(0), &ct(xyc(i), 0), ap::vlen(0,nvars-1));
249
+ v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
250
+ e = e+v;
251
+ }
252
+ if( e<ebest )
253
+ {
254
+
255
+ //
256
+ // store partition
257
+ //
258
+ copymatrix(ct, 0, k-1, 0, nvars-1, ctbest, 0, k-1, 0, nvars-1);
259
+ }
260
+ }
261
+
262
+ //
263
+ // Copy and transpose
264
+ //
265
+ c.setbounds(0, nvars-1, 0, k-1);
266
+ copyandtranspose(ctbest, 0, k-1, 0, nvars-1, c, 0, nvars-1, 0, k-1);
267
+ }
268
+
269
+
270
+ /*************************************************************************
271
+ Select center for a new cluster using k-means++ rule
272
+ *************************************************************************/
273
+ static bool selectcenterpp(const ap::real_2d_array& xy,
274
+ int npoints,
275
+ int nvars,
276
+ ap::real_2d_array& centers,
277
+ ap::boolean_1d_array busycenters,
278
+ int ccnt,
279
+ ap::real_1d_array& d2,
280
+ ap::real_1d_array& p,
281
+ ap::real_1d_array& tmp)
282
+ {
283
+ bool result;
284
+ int i;
285
+ int j;
286
+ int cc;
287
+ double v;
288
+ double s;
289
+
290
+ result = true;
291
+ for(cc = 0; cc <= ccnt-1; cc++)
292
+ {
293
+ if( !busycenters(cc) )
294
+ {
295
+
296
+ //
297
+ // fill D2
298
+ //
299
+ for(i = 0; i <= npoints-1; i++)
300
+ {
301
+ d2(i) = ap::maxrealnumber;
302
+ for(j = 0; j <= ccnt-1; j++)
303
+ {
304
+ if( busycenters(j) )
305
+ {
306
+ ap::vmove(&tmp(0), &xy(i, 0), ap::vlen(0,nvars-1));
307
+ ap::vsub(&tmp(0), &centers(j, 0), ap::vlen(0,nvars-1));
308
+ v = ap::vdotproduct(&tmp(0), &tmp(0), ap::vlen(0,nvars-1));
309
+ if( v<d2(i) )
310
+ {
311
+ d2(i) = v;
312
+ }
313
+ }
314
+ }
315
+ }
316
+
317
+ //
318
+ // calculate P (non-cumulative)
319
+ //
320
+ s = 0;
321
+ for(i = 0; i <= npoints-1; i++)
322
+ {
323
+ s = s+d2(i);
324
+ }
325
+ if( s==0 )
326
+ {
327
+ result = false;
328
+ return result;
329
+ }
330
+ s = 1/s;
331
+ ap::vmove(&p(0), &d2(0), ap::vlen(0,npoints-1), s);
332
+
333
+ //
334
+ // choose one of points with probability P
335
+ // random number within (0,1) is generated and
336
+ // inverse empirical CDF is used to randomly choose a point.
337
+ //
338
+ s = 0;
339
+ v = ap::randomreal();
340
+ for(i = 0; i <= npoints-1; i++)
341
+ {
342
+ s = s+p(i);
343
+ if( v<=s||i==npoints-1 )
344
+ {
345
+ ap::vmove(&centers(cc, 0), &xy(i, 0), ap::vlen(0,nvars-1));
346
+ busycenters(cc) = true;
347
+ break;
348
+ }
349
+ }
350
+ }
351
+ }
352
+ return result;
353
+ }
354
+
355
+
356
+
@@ -0,0 +1,76 @@
1
+ /*************************************************************************
2
+ Copyright (c) 2008, Sergey Bochkanov (ALGLIB project).
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are
6
+ met:
7
+
8
+ - Redistributions of source code must retain the above copyright
9
+ notice, this list of conditions and the following disclaimer.
10
+
11
+ - Redistributions in binary form must reproduce the above copyright
12
+ notice, this list of conditions and the following disclaimer listed
13
+ in this license in the documentation and/or other materials
14
+ provided with the distribution.
15
+
16
+ - Neither the name of the copyright holders nor the names of its
17
+ contributors may be used to endorse or promote products derived from
18
+ this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
+ *************************************************************************/
32
+
33
+ #ifndef _kmeans_h
34
+ #define _kmeans_h
35
+
36
+ #include "ap.h"
37
+ #include "ialglib.h"
38
+
39
+ #include "blas.h"
40
+
41
+
42
+ /*************************************************************************
43
+ k-means++ clusterization
44
+
45
+ INPUT PARAMETERS:
46
+ XY - dataset, array [0..NPoints-1,0..NVars-1].
47
+ NPoints - dataset size, NPoints>=K
48
+ NVars - number of variables, NVars>=1
49
+ K - desired number of clusters, K>=1
50
+ Restarts - number of restarts, Restarts>=1
51
+
52
+ OUTPUT PARAMETERS:
53
+ Info - return code:
54
+ * -3, if taskis degenerate (number of distinct points is
55
+ less than K)
56
+ * -1, if incorrect NPoints/NFeatures/K/Restarts was passed
57
+ * 1, if subroutine finished successfully
58
+ C - array[0..NVars-1,0..K-1].matrix whose columns store
59
+ cluster's centers
60
+ XYC - array which contains number of clusters dataset points
61
+ belong to.
62
+
63
+ -- ALGLIB --
64
+ Copyright 21.03.2009 by Bochkanov Sergey
65
+ *************************************************************************/
66
+ void kmeansgenerate(const ap::real_2d_array& xy,
67
+ int npoints,
68
+ int nvars,
69
+ int k,
70
+ int restarts,
71
+ int& info,
72
+ ap::real_2d_array& c,
73
+ ap::integer_1d_array& xyc);
74
+
75
+
76
+ #endif