svf-tools 1.0.285 → 1.0.286

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/LICENSE.TXT +6 -4
  2. package/SVF-doxygen/html/html/dir_63dba4c559aa5986900c35e27974bafc.html +83 -0
  3. package/SVF-doxygen/html/html/dir_97aefd0d527b934f1d99a682da8fe6a9.html +2 -0
  4. package/SVF-doxygen/html/html/dir_d1b325ac671111a54e189ae033ba710d.html +87 -0
  5. package/SVF-doxygen/html/html/dir_d44c64559bbebec7f509842c48db8b23.html +2 -0
  6. package/SVF-doxygen/html/html/fastcluster_8cpp.html +263 -0
  7. package/SVF-doxygen/html/html/fastcluster_8cpp_source.html +88 -0
  8. package/SVF-doxygen/html/html/fastcluster_8h.html +278 -0
  9. package/SVF-doxygen/html/html/fastcluster_8h_source.html +86 -0
  10. package/SVF-doxygen/html/html/fastcluster__R__dm_8cpp_8inc.html +79 -0
  11. package/SVF-doxygen/html/html/fastcluster__R__dm_8cpp_8inc_source.html +77 -0
  12. package/SVF-doxygen/html/html/fastcluster__dm_8cpp_8inc.html +89 -0
  13. package/SVF-doxygen/html/html/fastcluster__dm_8cpp_8inc_source.html +80 -0
  14. package/SVF-doxygen/html/html/files.html +220 -214
  15. package/SVF-doxygen/html/html/globals_c.html +82 -74
  16. package/SVF-doxygen/html/html/globals_enum.html +3 -0
  17. package/SVF-doxygen/html/html/globals_eval.html +19 -0
  18. package/SVF-doxygen/html/html/globals_f.html +6 -3
  19. package/SVF-doxygen/html/html/globals_func_c.html +46 -38
  20. package/SVF-doxygen/html/html/globals_func_f.html +3 -0
  21. package/SVF-doxygen/html/html/globals_func_h.html +4 -0
  22. package/SVF-doxygen/html/html/globals_h.html +22 -0
  23. package/SVF-doxygen/html/html/globals_r.html +3 -3
  24. package/SVF-doxygen/html/html/globals_s.html +7 -9
  25. package/SVF-doxygen/html/html/menudata.js +1 -0
  26. package/SVF-doxygen/html/html/search/all_12.js +1 -1
  27. package/SVF-doxygen/html/html/search/all_13.js +4 -4
  28. package/SVF-doxygen/html/html/search/all_3.js +2 -0
  29. package/SVF-doxygen/html/html/search/all_6.js +5 -0
  30. package/SVF-doxygen/html/html/search/all_8.js +7 -0
  31. package/SVF-doxygen/html/html/search/enums_4.js +1 -2
  32. package/SVF-doxygen/html/html/search/enums_5.js +2 -2
  33. package/SVF-doxygen/html/html/search/enums_6.js +2 -3
  34. package/SVF-doxygen/html/html/search/enums_7.js +3 -6
  35. package/SVF-doxygen/html/html/search/enums_8.js +6 -1
  36. package/SVF-doxygen/html/html/search/enums_9.js +1 -5
  37. package/SVF-doxygen/html/html/search/enums_a.js +5 -1
  38. package/SVF-doxygen/html/html/search/enums_b.js +1 -4
  39. package/SVF-doxygen/html/html/search/enums_c.html +26 -0
  40. package/SVF-doxygen/html/html/search/enums_c.js +7 -0
  41. package/SVF-doxygen/html/html/search/enumvalues_7.js +5 -0
  42. package/SVF-doxygen/html/html/search/files_5.js +4 -0
  43. package/SVF-doxygen/html/html/search/functions_2.js +2 -0
  44. package/SVF-doxygen/html/html/search/functions_5.js +1 -0
  45. package/SVF-doxygen/html/html/search/functions_7.js +1 -0
  46. package/SVF-doxygen/html/html/search/searchdata.js +1 -1
  47. package/include/FastCluster/LICENSE.TXT +13 -0
  48. package/include/FastCluster/fastcluster.h +79 -0
  49. package/lib/CMakeLists.txt +4 -1
  50. package/lib/FastCluster/LICENSE.TXT +13 -0
  51. package/lib/FastCluster/fastcluster.cpp +170 -0
  52. package/lib/FastCluster/fastcluster_R_dm.cpp.inc +115 -0
  53. package/lib/FastCluster/fastcluster_dm.cpp.inc +1795 -0
  54. package/package.json +1 -1
@@ -0,0 +1,1795 @@
1
+ /*
2
+ fastcluster: Fast hierarchical clustering routines for R and Python
3
+
4
+ Copyright © 2011 Daniel Müllner
5
+ <http://danifold.net>
6
+
7
+ This library implements various fast algorithms for hierarchical,
8
+ agglomerative clustering methods:
9
+
10
+ (1) Algorithms for the "stored matrix approach": the input is the array of
11
+ pairwise dissimilarities.
12
+
13
+ MST_linkage_core: single linkage clustering with the "minimum spanning
14
+ tree algorithm (Rohlfs)
15
+
16
+ NN_chain_core: nearest-neighbor-chain algorithm, suitable for single,
17
+ complete, average, weighted and Ward linkage (Murtagh)
18
+
19
+ generic_linkage: generic algorithm, suitable for all distance update
20
+ formulas (Müllner)
21
+
22
+ (2) Algorithms for the "stored data approach": the input are points in a
23
+ vector space.
24
+
25
+ MST_linkage_core_vector: single linkage clustering for vector data
26
+
27
+ generic_linkage_vector: generic algorithm for vector data, suitable for
28
+ the Ward, centroid and median methods.
29
+
30
+ generic_linkage_vector_alternative: alternative scheme for updating the
31
+ nearest neighbors. This method seems faster than "generic_linkage_vector"
32
+ for the centroid and median methods but slower for the Ward method.
33
+
34
+ All these implementation treat infinity values correctly. They throw an
35
+ exception if a NaN distance value occurs.
36
+ */
37
+
38
+ // Older versions of Microsoft Visual Studio do not have the fenv header.
39
+ #ifdef _MSC_VER
40
+ #if (_MSC_VER == 1500 || _MSC_VER == 1600)
41
+ #define NO_INCLUDE_FENV
42
+ #endif
43
+ #endif
44
+ // NaN detection via fenv might not work on systems with software
45
+ // floating-point emulation (bug report for Debian armel).
46
+ #ifdef __SOFTFP__
47
+ #define NO_INCLUDE_FENV
48
+ #endif
49
+ #ifdef NO_INCLUDE_FENV
50
+ // #pragma message("Do not use fenv header.")
51
+ #else
52
+ // #pragma message("Use fenv header. If there is a warning about unknown #pragma STDC FENV_ACCESS, this can be ignored.")
53
+ // TODO
54
+ //#pragma STDC FENV_ACCESS ON
55
+ #include <fenv.h>
56
+ #endif
57
+
58
+ #include <cmath> // for std::pow, std::sqrt
59
+ #include <cstddef> // for std::ptrdiff_t
60
+ #include <limits> // for std::numeric_limits<...>::infinity()
61
+ #include <algorithm> // for std::fill_n
62
+ #include <stdexcept> // for std::runtime_error
63
+ #include <string> // for std::string
64
+
65
+ #include <cfloat> // also for DBL_MAX, DBL_MIN
66
+ #ifndef DBL_MANT_DIG
67
+ #error The constant DBL_MANT_DIG could not be defined.
68
+ #endif
69
+ #define T_FLOAT_MANT_DIG DBL_MANT_DIG
70
+
71
+ #ifndef LONG_MAX
72
+ #include <climits>
73
+ #endif
74
+ #ifndef LONG_MAX
75
+ #error The constant LONG_MAX could not be defined.
76
+ #endif
77
+ #ifndef INT_MAX
78
+ #error The constant INT_MAX could not be defined.
79
+ #endif
80
+
81
+ #ifndef INT32_MAX
82
+ #ifdef _MSC_VER
83
+ #if _MSC_VER >= 1600
84
+ #define __STDC_LIMIT_MACROS
85
+ #include <stdint.h>
86
+ #else
87
+ typedef __int32 int_fast32_t;
88
+ typedef __int64 int64_t;
89
+ #endif
90
+ #else
91
+ #define __STDC_LIMIT_MACROS
92
+ #include <stdint.h>
93
+ #endif
94
+ #endif
95
+
96
+ #define FILL_N std::fill_n
97
+ #ifdef _MSC_VER
98
+ #if _MSC_VER < 1600
99
+ #undef FILL_N
100
+ #define FILL_N stdext::unchecked_fill_n
101
+ #endif
102
+ #endif
103
+
104
+ // Suppress warnings about (potentially) uninitialized variables.
105
+ #ifdef _MSC_VER
106
+ #pragma warning (disable:4700)
107
+ #endif
108
+
109
+ #ifndef HAVE_DIAGNOSTIC
110
+ #if __GNUC__ > 4 || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 6))
111
+ #define HAVE_DIAGNOSTIC 1
112
+ #endif
113
+ #endif
114
+
115
+ #ifndef HAVE_VISIBILITY
116
+ #if __GNUC__ >= 4
117
+ #define HAVE_VISIBILITY 1
118
+ #endif
119
+ #endif
120
+
121
+ /* Since the public interface is given by the Python respectively R interface,
122
+ * we do not want other symbols than the interface initalization routines to be
123
+ * visible in the shared object file. The "visibility" switch is a GCC concept.
124
+ * Hiding symbols keeps the relocation table small and decreases startup time.
125
+ * See http://gcc.gnu.org/wiki/Visibility
126
+ */
127
+ #if HAVE_VISIBILITY
128
+ #pragma GCC visibility push(hidden)
129
+ #endif
130
+
131
+ typedef int_fast32_t t_index;
132
+ #ifndef INT32_MAX
133
+ #define MAX_INDEX 0x7fffffffL
134
+ #else
135
+ #define MAX_INDEX INT32_MAX
136
+ #endif
137
+ #if (LONG_MAX < MAX_INDEX)
138
+ #error The integer format "t_index" must not have a greater range than "long int".
139
+ #endif
140
+ #if (INT_MAX > MAX_INDEX)
141
+ #error The integer format "int" must not have a greater range than "t_index".
142
+ #endif
143
+ typedef double t_float;
144
+
145
+ /* Method codes.
146
+
147
+ These codes must agree with the METHODS array in fastcluster.R and the
148
+ dictionary mthidx in fastcluster.py.
149
+ */
150
+ enum method_codes {
151
+ // non-Euclidean methods
152
+ METHOD_METR_SINGLE = 0,
153
+ METHOD_METR_COMPLETE = 1,
154
+ METHOD_METR_AVERAGE = 2,
155
+ METHOD_METR_WEIGHTED = 3,
156
+ METHOD_METR_WARD = 4,
157
+ METHOD_METR_WARD_D = METHOD_METR_WARD,
158
+ METHOD_METR_CENTROID = 5,
159
+ METHOD_METR_MEDIAN = 6,
160
+ METHOD_METR_WARD_D2 = 7,
161
+
162
+ MIN_METHOD_CODE = 0,
163
+ MAX_METHOD_CODE = 7
164
+ };
165
+
166
+ enum method_codes_vector {
167
+ // Euclidean methods
168
+ METHOD_VECTOR_SINGLE = 0,
169
+ METHOD_VECTOR_WARD = 1,
170
+ METHOD_VECTOR_CENTROID = 2,
171
+ METHOD_VECTOR_MEDIAN = 3,
172
+
173
+ MIN_METHOD_VECTOR_CODE = 0,
174
+ MAX_METHOD_VECTOR_CODE = 3
175
+ };
176
+
177
+ // self-destructing array pointer
178
+ template <typename type>
179
+ class auto_array_ptr{
180
+ private:
181
+ type * ptr;
182
+ auto_array_ptr(auto_array_ptr const &); // non construction-copyable
183
+ auto_array_ptr& operator=(auto_array_ptr const &); // non copyable
184
+ public:
185
+ auto_array_ptr()
186
+ : ptr(NULL)
187
+ { }
188
+ template <typename index>
189
+ auto_array_ptr(index const size)
190
+ : ptr(new type[size])
191
+ { }
192
+ template <typename index, typename value>
193
+ auto_array_ptr(index const size, value const val)
194
+ : ptr(new type[size])
195
+ {
196
+ FILL_N(ptr, size, val);
197
+ }
198
+ ~auto_array_ptr() {
199
+ delete [] ptr; }
200
+ void free() {
201
+ delete [] ptr;
202
+ ptr = NULL;
203
+ }
204
+ template <typename index>
205
+ void init(index const size) {
206
+ ptr = new type [size];
207
+ }
208
+ template <typename index, typename value>
209
+ void init(index const size, value const val) {
210
+ init(size);
211
+ FILL_N(ptr, size, val);
212
+ }
213
+ inline operator type *() const { return ptr; }
214
+ };
215
+
216
+ struct node {
217
+ t_index node1, node2;
218
+ t_float dist;
219
+ };
220
+
221
+ inline bool operator< (const node a, const node b) {
222
+ return (a.dist < b.dist);
223
+ }
224
+
225
+ class cluster_result {
226
+ private:
227
+ auto_array_ptr<node> Z;
228
+ t_index pos;
229
+
230
+ public:
231
+ cluster_result(const t_index size)
232
+ : Z(size)
233
+ , pos(0)
234
+ {}
235
+
236
+ void append(const t_index node1, const t_index node2, const t_float dist) {
237
+ Z[pos].node1 = node1;
238
+ Z[pos].node2 = node2;
239
+ Z[pos].dist = dist;
240
+ ++pos;
241
+ }
242
+
243
+ node * operator[] (const t_index idx) const { return Z + idx; }
244
+
245
+ /* Define several methods to postprocess the distances. All these functions
246
+ are monotone, so they do not change the sorted order of distances. */
247
+
248
+ void sqrt() const {
249
+ for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
250
+ ZZ->dist = std::sqrt(ZZ->dist);
251
+ }
252
+ }
253
+
254
+ void sqrt(const t_float) const { // ignore the argument
255
+ sqrt();
256
+ }
257
+
258
+ void sqrtdouble(const t_float) const { // ignore the argument
259
+ for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
260
+ ZZ->dist = std::sqrt(2*ZZ->dist);
261
+ }
262
+ }
263
+
264
+ #ifdef R_pow
265
+ #define my_pow R_pow
266
+ #else
267
+ #define my_pow std::pow
268
+ #endif
269
+
270
+ void power(const t_float p) const {
271
+ t_float const q = 1/p;
272
+ for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
273
+ ZZ->dist = my_pow(ZZ->dist,q);
274
+ }
275
+ }
276
+
277
+ void plusone(const t_float) const { // ignore the argument
278
+ for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
279
+ ZZ->dist += 1;
280
+ }
281
+ }
282
+
283
+ void divide(const t_float denom) const {
284
+ for (node * ZZ=Z; ZZ!=Z+pos; ++ZZ) {
285
+ ZZ->dist /= denom;
286
+ }
287
+ }
288
+ };
289
+
290
+ class doubly_linked_list {
291
+ /*
292
+ Class for a doubly linked list. Initially, the list is the integer range
293
+ [0, size]. We provide a forward iterator and a method to delete an index
294
+ from the list.
295
+
296
+ Typical use: for (i=L.start; L<size; i=L.succ[I])
297
+ or
298
+ for (i=somevalue; L<size; i=L.succ[I])
299
+ */
300
+ public:
301
+ t_index start;
302
+ auto_array_ptr<t_index> succ;
303
+
304
+ private:
305
+ auto_array_ptr<t_index> pred;
306
+ // Not necessarily private, we just do not need it in this instance.
307
+
308
+ public:
309
+ doubly_linked_list(const t_index size)
310
+ // Initialize to the given size.
311
+ : start(0)
312
+ , succ(size+1)
313
+ , pred(size+1)
314
+ {
315
+ for (t_index i=0; i<size; ++i) {
316
+ pred[i+1] = i;
317
+ succ[i] = i+1;
318
+ }
319
+ // pred[0] is never accessed!
320
+ //succ[size] is never accessed!
321
+ }
322
+
323
+ ~doubly_linked_list() {}
324
+
325
+ void remove(const t_index idx) {
326
+ // Remove an index from the list.
327
+ if (idx==start) {
328
+ start = succ[idx];
329
+ }
330
+ else {
331
+ succ[pred[idx]] = succ[idx];
332
+ pred[succ[idx]] = pred[idx];
333
+ }
334
+ succ[idx] = 0; // Mark as inactive
335
+ }
336
+
337
+ bool is_inactive(t_index idx) const {
338
+ return (succ[idx]==0);
339
+ }
340
+ };
341
+
342
+ // Indexing functions
343
+ // D is the upper triangular part of a symmetric (NxN)-matrix
344
+ // We require r_ < c_ !
345
+ #define D_(r_,c_) ( D[(static_cast<std::ptrdiff_t>(2*N-3-(r_))*(r_)>>1)+(c_)-1] )
346
+ // Z is an ((N-1)x4)-array
347
+ #define Z_(_r, _c) (Z[(_r)*4 + (_c)])
348
+
349
+ /*
350
+ Lookup function for a union-find data structure.
351
+
352
+ The function finds the root of idx by going iteratively through all
353
+ parent elements until a root is found. An element i is a root if
354
+ nodes[i] is zero. To make subsequent searches faster, the entry for
355
+ idx and all its parents is updated with the root element.
356
+ */
357
+ class union_find {
358
+ private:
359
+ auto_array_ptr<t_index> parent;
360
+ t_index nextparent;
361
+
362
+ public:
363
+ union_find(const t_index size)
364
+ : parent(size>0 ? 2*size-1 : 0, 0)
365
+ , nextparent(size)
366
+ { }
367
+
368
+ t_index Find (t_index idx) const {
369
+ if (parent[idx] != 0 ) { // a → b
370
+ t_index p = idx;
371
+ idx = parent[idx];
372
+ if (parent[idx] != 0 ) { // a → b → c
373
+ do {
374
+ idx = parent[idx];
375
+ } while (parent[idx] != 0);
376
+ do {
377
+ t_index tmp = parent[p];
378
+ parent[p] = idx;
379
+ p = tmp;
380
+ } while (parent[p] != idx);
381
+ }
382
+ }
383
+ return idx;
384
+ }
385
+
386
+ void Union (const t_index node1, const t_index node2) {
387
+ parent[node1] = parent[node2] = nextparent++;
388
+ }
389
+ };
390
+
391
+ class nan_error{};
392
+ #ifdef FE_INVALID
393
+ class fenv_error{};
394
+ #endif
395
+
396
+ static void MST_linkage_core(const t_index N, const t_float * const D,
397
+ cluster_result & Z2) {
398
+ /*
399
+ N: integer, number of data points
400
+ D: condensed distance matrix N*(N-1)/2
401
+ Z2: output data structure
402
+
403
+ The basis of this algorithm is an algorithm by Rohlf:
404
+
405
+ F. James Rohlf, Hierarchical clustering using the minimum spanning tree,
406
+ The Computer Journal, vol. 16, 1973, p. 93–95.
407
+ */
408
+ t_index i;
409
+ t_index idx2;
410
+ doubly_linked_list active_nodes(N);
411
+ auto_array_ptr<t_float> d(N);
412
+
413
+ t_index prev_node;
414
+ t_float min;
415
+
416
+ // first iteration
417
+ idx2 = 1;
418
+ min = std::numeric_limits<t_float>::infinity();
419
+ for (i=1; i<N; ++i) {
420
+ d[i] = D[i-1];
421
+ #if HAVE_DIAGNOSTIC
422
+ #pragma GCC diagnostic push
423
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
424
+ #endif
425
+ if (d[i] < min) {
426
+ min = d[i];
427
+ idx2 = i;
428
+ }
429
+ else if (fc_isnan(d[i]))
430
+ assert(false && "fastcluster: nan error");
431
+ #if HAVE_DIAGNOSTIC
432
+ #pragma GCC diagnostic pop
433
+ #endif
434
+ }
435
+ Z2.append(0, idx2, min);
436
+
437
+ for (t_index j=1; j<N-1; ++j) {
438
+ prev_node = idx2;
439
+ active_nodes.remove(prev_node);
440
+
441
+ idx2 = active_nodes.succ[0];
442
+ min = d[idx2];
443
+ for (i=idx2; i<prev_node; i=active_nodes.succ[i]) {
444
+ t_float tmp = D_(i, prev_node);
445
+ #if HAVE_DIAGNOSTIC
446
+ #pragma GCC diagnostic push
447
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
448
+ #endif
449
+ if (tmp < d[i])
450
+ d[i] = tmp;
451
+ else if (fc_isnan(tmp))
452
+ assert(false && "fastcluster: nan error");
453
+ #if HAVE_DIAGNOSTIC
454
+ #pragma GCC diagnostic pop
455
+ #endif
456
+ if (d[i] < min) {
457
+ min = d[i];
458
+ idx2 = i;
459
+ }
460
+ }
461
+ for (; i<N; i=active_nodes.succ[i]) {
462
+ t_float tmp = D_(prev_node, i);
463
+ #if HAVE_DIAGNOSTIC
464
+ #pragma GCC diagnostic push
465
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
466
+ #endif
467
+ if (d[i] > tmp)
468
+ d[i] = tmp;
469
+ else if (fc_isnan(tmp))
470
+ assert(false && "fastcluster: nan error");
471
+ #if HAVE_DIAGNOSTIC
472
+ #pragma GCC diagnostic pop
473
+ #endif
474
+ if (d[i] < min) {
475
+ min = d[i];
476
+ idx2 = i;
477
+ }
478
+ }
479
+ Z2.append(prev_node, idx2, min);
480
+ }
481
+ }
482
+
483
+ /* Functions for the update of the dissimilarity array */
484
+
485
+ inline static void f_single( t_float * const b, const t_float a ) {
486
+ if (*b > a) *b = a;
487
+ }
488
+ inline static void f_complete( t_float * const b, const t_float a ) {
489
+ if (*b < a) *b = a;
490
+ }
491
+ inline static void f_average( t_float * const b, const t_float a, const t_float s, const t_float t) {
492
+ *b = s*a + t*(*b);
493
+ #ifndef FE_INVALID
494
+ #if HAVE_DIAGNOSTIC
495
+ #pragma GCC diagnostic push
496
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
497
+ #endif
498
+ if (fc_isnan(*b)) {
499
+ assert(false && "fastcluster: nan error");
500
+ }
501
+ #if HAVE_DIAGNOSTIC
502
+ #pragma GCC diagnostic pop
503
+ #endif
504
+ #endif
505
+ }
506
+ inline static void f_weighted( t_float * const b, const t_float a) {
507
+ *b = (a+*b)*.5;
508
+ #ifndef FE_INVALID
509
+ #if HAVE_DIAGNOSTIC
510
+ #pragma GCC diagnostic push
511
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
512
+ #endif
513
+ if (fc_isnan(*b)) {
514
+ assert(false && "fastcluster: nan error");
515
+ }
516
+ #if HAVE_DIAGNOSTIC
517
+ #pragma GCC diagnostic pop
518
+ #endif
519
+ #endif
520
+ }
521
+ inline static void f_ward( t_float * const b, const t_float a, const t_float c, const t_float s, const t_float t, const t_float v) {
522
+ *b = ( (v+s)*a - v*c + (v+t)*(*b) ) / (s+t+v);
523
+ //*b = a+(*b)-(t*a+s*(*b)+v*c)/(s+t+v);
524
+ #ifndef FE_INVALID
525
+ #if HAVE_DIAGNOSTIC
526
+ #pragma GCC diagnostic push
527
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
528
+ #endif
529
+ if (fc_isnan(*b)) {
530
+ assert(false && "fastcluster: nan error");
531
+ }
532
+ #if HAVE_DIAGNOSTIC
533
+ #pragma GCC diagnostic pop
534
+ #endif
535
+ #endif
536
+ }
537
+ inline static void f_centroid( t_float * const b, const t_float a, const t_float stc, const t_float s, const t_float t) {
538
+ *b = s*a - stc + t*(*b);
539
+ #ifndef FE_INVALID
540
+ if (fc_isnan(*b)) {
541
+ assert(false && "fastcluster: nan error");
542
+ }
543
+ #if HAVE_DIAGNOSTIC
544
+ #pragma GCC diagnostic pop
545
+ #endif
546
+ #endif
547
+ }
548
+ inline static void f_median( t_float * const b, const t_float a, const t_float c_4) {
549
+ *b = (a+(*b))*.5 - c_4;
550
+ #ifndef FE_INVALID
551
+ #if HAVE_DIAGNOSTIC
552
+ #pragma GCC diagnostic push
553
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
554
+ #endif
555
+ if (fc_isnan(*b)) {
556
+ assert(false && "fastcluster: nan error");
557
+ }
558
+ #if HAVE_DIAGNOSTIC
559
+ #pragma GCC diagnostic pop
560
+ #endif
561
+ #endif
562
+ }
563
+
564
+ template <method_codes method, typename t_members>
565
+ static void NN_chain_core(const t_index N, t_float * const D, t_members * const members, cluster_result & Z2) {
566
+ /*
567
+ N: integer
568
+ D: condensed distance matrix N*(N-1)/2
569
+ Z2: output data structure
570
+
571
+ This is the NN-chain algorithm, described on page 86 in the following book:
572
+
573
+ Fionn Murtagh, Multidimensional Clustering Algorithms,
574
+ Vienna, Würzburg: Physica-Verlag, 1985.
575
+ */
576
+ t_index i;
577
+
578
+ auto_array_ptr<t_index> NN_chain(N);
579
+ t_index NN_chain_tip = 0;
580
+
581
+ t_index idx1, idx2;
582
+
583
+ t_float size1, size2;
584
+ doubly_linked_list active_nodes(N);
585
+
586
+ t_float min;
587
+
588
+ for (t_float const * DD=D; DD!=D+(static_cast<std::ptrdiff_t>(N)*(N-1)>>1);
589
+ ++DD) {
590
+ #if HAVE_DIAGNOSTIC
591
+ #pragma GCC diagnostic push
592
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
593
+ #endif
594
+ if (fc_isnan(*DD)) {
595
+ assert(false && "fastcluster: nan error");
596
+ }
597
+ #if HAVE_DIAGNOSTIC
598
+ #pragma GCC diagnostic pop
599
+ #endif
600
+ }
601
+
602
+ #ifdef FE_INVALID
603
+ if (feclearexcept(FE_INVALID)) assert(false && "fastcluster: fenv error");
604
+ #endif
605
+
606
+ for (t_index j=0; j<N-1; ++j) {
607
+ if (NN_chain_tip <= 3) {
608
+ NN_chain[0] = idx1 = active_nodes.start;
609
+ NN_chain_tip = 1;
610
+
611
+ idx2 = active_nodes.succ[idx1];
612
+ min = D_(idx1,idx2);
613
+ for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i]) {
614
+ if (D_(idx1,i) < min) {
615
+ min = D_(idx1,i);
616
+ idx2 = i;
617
+ }
618
+ }
619
+ } // a: idx1 b: idx2
620
+ else {
621
+ NN_chain_tip -= 3;
622
+ idx1 = NN_chain[NN_chain_tip-1];
623
+ idx2 = NN_chain[NN_chain_tip];
624
+ min = idx1<idx2 ? D_(idx1,idx2) : D_(idx2,idx1);
625
+ } // a: idx1 b: idx2
626
+
627
+ do {
628
+ NN_chain[NN_chain_tip] = idx2;
629
+
630
+ for (i=active_nodes.start; i<idx2; i=active_nodes.succ[i]) {
631
+ if (D_(i,idx2) < min) {
632
+ min = D_(i,idx2);
633
+ idx1 = i;
634
+ }
635
+ }
636
+ for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i]) {
637
+ if (D_(idx2,i) < min) {
638
+ min = D_(idx2,i);
639
+ idx1 = i;
640
+ }
641
+ }
642
+
643
+ idx2 = idx1;
644
+ idx1 = NN_chain[NN_chain_tip++];
645
+
646
+ } while (idx2 != NN_chain[NN_chain_tip-2]);
647
+
648
+ Z2.append(idx1, idx2, min);
649
+
650
+ if (idx1>idx2) {
651
+ t_index tmp = idx1;
652
+ idx1 = idx2;
653
+ idx2 = tmp;
654
+ }
655
+
656
+ if (method==METHOD_METR_AVERAGE ||
657
+ method==METHOD_METR_WARD) {
658
+ size1 = static_cast<t_float>(members[idx1]);
659
+ size2 = static_cast<t_float>(members[idx2]);
660
+ members[idx2] += members[idx1];
661
+ }
662
+
663
+ // Remove the smaller index from the valid indices (active_nodes).
664
+ active_nodes.remove(idx1);
665
+
666
+ switch (method) {
667
+ case METHOD_METR_SINGLE:
668
+ /*
669
+ Single linkage.
670
+
671
+ Characteristic: new distances are never longer than the old distances.
672
+ */
673
+ // Update the distance matrix in the range [start, idx1).
674
+ for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
675
+ f_single(&D_(i, idx2), D_(i, idx1) );
676
+ // Update the distance matrix in the range (idx1, idx2).
677
+ for (; i<idx2; i=active_nodes.succ[i])
678
+ f_single(&D_(i, idx2), D_(idx1, i) );
679
+ // Update the distance matrix in the range (idx2, N).
680
+ for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
681
+ f_single(&D_(idx2, i), D_(idx1, i) );
682
+ break;
683
+
684
+ case METHOD_METR_COMPLETE:
685
+ /*
686
+ Complete linkage.
687
+
688
+ Characteristic: new distances are never shorter than the old distances.
689
+ */
690
+ // Update the distance matrix in the range [start, idx1).
691
+ for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
692
+ f_complete(&D_(i, idx2), D_(i, idx1) );
693
+ // Update the distance matrix in the range (idx1, idx2).
694
+ for (; i<idx2; i=active_nodes.succ[i])
695
+ f_complete(&D_(i, idx2), D_(idx1, i) );
696
+ // Update the distance matrix in the range (idx2, N).
697
+ for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
698
+ f_complete(&D_(idx2, i), D_(idx1, i) );
699
+ break;
700
+
701
+ case METHOD_METR_AVERAGE: {
702
+ /*
703
+ Average linkage.
704
+
705
+ Shorter and longer distances can occur.
706
+ */
707
+ // Update the distance matrix in the range [start, idx1).
708
+ t_float s = size1/(size1+size2);
709
+ t_float t = size2/(size1+size2);
710
+ for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
711
+ f_average(&D_(i, idx2), D_(i, idx1), s, t );
712
+ // Update the distance matrix in the range (idx1, idx2).
713
+ for (; i<idx2; i=active_nodes.succ[i])
714
+ f_average(&D_(i, idx2), D_(idx1, i), s, t );
715
+ // Update the distance matrix in the range (idx2, N).
716
+ for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
717
+ f_average(&D_(idx2, i), D_(idx1, i), s, t );
718
+ break;
719
+ }
720
+
721
+ case METHOD_METR_WEIGHTED:
722
+ /*
723
+ Weighted linkage.
724
+
725
+ Shorter and longer distances can occur.
726
+ */
727
+ // Update the distance matrix in the range [start, idx1).
728
+ for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
729
+ f_weighted(&D_(i, idx2), D_(i, idx1) );
730
+ // Update the distance matrix in the range (idx1, idx2).
731
+ for (; i<idx2; i=active_nodes.succ[i])
732
+ f_weighted(&D_(i, idx2), D_(idx1, i) );
733
+ // Update the distance matrix in the range (idx2, N).
734
+ for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
735
+ f_weighted(&D_(idx2, i), D_(idx1, i) );
736
+ break;
737
+
738
+ case METHOD_METR_WARD:
739
+ /*
740
+ Ward linkage.
741
+
742
+ Shorter and longer distances can occur, not smaller than min(d1,d2)
743
+ but maybe bigger than max(d1,d2).
744
+ */
745
+ // Update the distance matrix in the range [start, idx1).
746
+ //t_float v = static_cast<t_float>(members[i]);
747
+ for (i=active_nodes.start; i<idx1; i=active_nodes.succ[i])
748
+ f_ward(&D_(i, idx2), D_(i, idx1), min,
749
+ size1, size2, static_cast<t_float>(members[i]) );
750
+ // Update the distance matrix in the range (idx1, idx2).
751
+ for (; i<idx2; i=active_nodes.succ[i])
752
+ f_ward(&D_(i, idx2), D_(idx1, i), min,
753
+ size1, size2, static_cast<t_float>(members[i]) );
754
+ // Update the distance matrix in the range (idx2, N).
755
+ for (i=active_nodes.succ[idx2]; i<N; i=active_nodes.succ[i])
756
+ f_ward(&D_(idx2, i), D_(idx1, i), min,
757
+ size1, size2, static_cast<t_float>(members[i]) );
758
+ break;
759
+
760
+ default:
761
+ assert(false && "fastcluster: invalid method");
762
+ }
763
+ }
764
+ #ifdef FE_INVALID
765
+ if (fetestexcept(FE_INVALID)) assert(false && "fastcluster: fenv error");
766
+ #endif
767
+ }
768
+
769
+ class binary_min_heap {
770
+ /*
771
+ Class for a binary min-heap. The data resides in an array A. The elements of
772
+ A are not changed but two lists I and R of indices are generated which point
773
+ to elements of A and backwards.
774
+
775
+ The heap tree structure is
776
+
777
+ H[2*i+1] H[2*i+2]
778
+ \ /
779
+ \ /
780
+ ≤ ≤
781
+ \ /
782
+ \ /
783
+ H[i]
784
+
785
+ where the children must be less or equal than their parent. Thus, H[0]
786
+ contains the minimum. The lists I and R are made such that H[i] = A[I[i]]
787
+ and R[I[i]] = i.
788
+
789
+ This implementation is not designed to handle NaN values.
790
+ */
791
+ private:
792
+ t_float * const A;
793
+ t_index size;
794
+ auto_array_ptr<t_index> I;
795
+ auto_array_ptr<t_index> R;
796
+
797
+ // no default constructor
798
+ binary_min_heap();
799
+ // noncopyable
800
+ binary_min_heap(binary_min_heap const &);
801
+ binary_min_heap & operator=(binary_min_heap const &);
802
+
803
+ public:
804
+ binary_min_heap(t_float * const A_, const t_index size_)
805
+ : A(A_), size(size_), I(size), R(size)
806
+ { // Allocate memory and initialize the lists I and R to the identity. This
807
+ // does not make it a heap. Call heapify afterwards!
808
+ for (t_index i=0; i<size; ++i)
809
+ R[i] = I[i] = i;
810
+ }
811
+
812
+ binary_min_heap(t_float * const A_, const t_index size1, const t_index size2,
813
+ const t_index start)
814
+ : A(A_), size(size1), I(size1), R(size2)
815
+ { // Allocate memory and initialize the lists I and R to the identity. This
816
+ // does not make it a heap. Call heapify afterwards!
817
+ for (t_index i=0; i<size; ++i) {
818
+ R[i+start] = i;
819
+ I[i] = i + start;
820
+ }
821
+ }
822
+
823
+ ~binary_min_heap() {}
824
+
825
+ void heapify() {
826
+ // Arrange the indices I and R so that H[i] := A[I[i]] satisfies the heap
827
+ // condition H[i] < H[2*i+1] and H[i] < H[2*i+2] for each i.
828
+ //
829
+ // Complexity: Θ(size)
830
+ // Reference: Cormen, Leiserson, Rivest, Stein, Introduction to Algorithms,
831
+ // 3rd ed., 2009, Section 6.3 “Building a heap”
832
+ t_index idx;
833
+ for (idx=(size>>1); idx>0; ) {
834
+ --idx;
835
+ update_geq_(idx);
836
+ }
837
+ }
838
+
839
+ inline t_index argmin() const {
840
+ // Return the minimal element.
841
+ return I[0];
842
+ }
843
+
844
+ void heap_pop() {
845
+ // Remove the minimal element from the heap.
846
+ --size;
847
+ I[0] = I[size];
848
+ R[I[0]] = 0;
849
+ update_geq_(0);
850
+ }
851
+
852
+ void remove(t_index idx) {
853
+ // Remove an element from the heap.
854
+ --size;
855
+ R[I[size]] = R[idx];
856
+ I[R[idx]] = I[size];
857
+ if ( H(size)<=A[idx] ) {
858
+ update_leq_(R[idx]);
859
+ }
860
+ else {
861
+ update_geq_(R[idx]);
862
+ }
863
+ }
864
+
865
+ void replace ( const t_index idxold, const t_index idxnew,
866
+ const t_float val) {
867
+ R[idxnew] = R[idxold];
868
+ I[R[idxnew]] = idxnew;
869
+ if (val<=A[idxold])
870
+ update_leq(idxnew, val);
871
+ else
872
+ update_geq(idxnew, val);
873
+ }
874
+
875
+ void update ( const t_index idx, const t_float val ) const {
876
+ // Update the element A[i] with val and re-arrange the indices to preserve
877
+ // the heap condition.
878
+ if (val<=A[idx])
879
+ update_leq(idx, val);
880
+ else
881
+ update_geq(idx, val);
882
+ }
883
+
884
+ void update_leq ( const t_index idx, const t_float val ) const {
885
+ // Use this when the new value is not more than the old value.
886
+ A[idx] = val;
887
+ update_leq_(R[idx]);
888
+ }
889
+
890
+ void update_geq ( const t_index idx, const t_float val ) const {
891
+ // Use this when the new value is not less than the old value.
892
+ A[idx] = val;
893
+ update_geq_(R[idx]);
894
+ }
895
+
896
+ private:
897
+ void update_leq_ (t_index i) const {
898
+ t_index j;
899
+ for ( ; (i>0) && ( H(i)<H(j=(i-1)>>1) ); i=j)
900
+ heap_swap(i,j);
901
+ }
902
+
903
+ void update_geq_ (t_index i) const {
904
+ t_index j;
905
+ for ( ; (j=2*i+1)<size; i=j) {
906
+ if ( H(j)>=H(i) ) {
907
+ ++j;
908
+ if ( j>=size || H(j)>=H(i) ) break;
909
+ }
910
+ else if ( j+1<size && H(j+1)<H(j) ) ++j;
911
+ heap_swap(i, j);
912
+ }
913
+ }
914
+
915
+ void heap_swap(const t_index i, const t_index j) const {
916
+ // Swap two indices.
917
+ t_index tmp = I[i];
918
+ I[i] = I[j];
919
+ I[j] = tmp;
920
+ R[I[i]] = i;
921
+ R[I[j]] = j;
922
+ }
923
+
924
+ inline t_float H(const t_index i) const {
925
+ return A[I[i]];
926
+ }
927
+
928
+ };
929
+
930
+ template <method_codes method, typename t_members>
931
+ static void generic_linkage(const t_index N, t_float * const D, t_members * const members, cluster_result & Z2) {
932
+ /*
933
+ N: integer, number of data points
934
+ D: condensed distance matrix N*(N-1)/2
935
+ Z2: output data structure
936
+ */
937
+
938
+ const t_index N_1 = N-1;
939
+ t_index i, j; // loop variables
940
+ t_index idx1, idx2; // row and column indices
941
+
942
+ auto_array_ptr<t_index> n_nghbr(N_1); // array of nearest neighbors
943
+ auto_array_ptr<t_float> mindist(N_1); // distances to the nearest neighbors
944
+ auto_array_ptr<t_index> row_repr(N); // row_repr[i]: node number that the
945
+ // i-th row represents
946
+ doubly_linked_list active_nodes(N);
947
+ binary_min_heap nn_distances(&*mindist, N_1); // minimum heap structure for
948
+ // the distance to the nearest neighbor of each point
949
+ t_index node1, node2; // node numbers in the output
950
+ t_float size1, size2; // and their cardinalities
951
+
952
+ t_float min; // minimum and row index for nearest-neighbor search
953
+ t_index idx;
954
+
955
+ for (i=0; i<N; ++i)
956
+ // Build a list of row ↔ node label assignments.
957
+ // Initially i ↦ i
958
+ row_repr[i] = i;
959
+
960
+ // Initialize the minimal distances:
961
+ // Find the nearest neighbor of each point.
962
+ // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
963
+ t_float const * DD = D;
964
+ for (i=0; i<N_1; ++i) {
965
+ min = std::numeric_limits<t_float>::infinity();
966
+ for (idx=j=i+1; j<N; ++j, ++DD) {
967
+ #if HAVE_DIAGNOSTIC
968
+ #pragma GCC diagnostic push
969
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
970
+ #endif
971
+ if (*DD<min) {
972
+ min = *DD;
973
+ idx = j;
974
+ }
975
+ else if (fc_isnan(*DD))
976
+ assert(false && "fastcluster: nan error");
977
+ }
978
+ #if HAVE_DIAGNOSTIC
979
+ #pragma GCC diagnostic pop
980
+ #endif
981
+ mindist[i] = min;
982
+ n_nghbr[i] = idx;
983
+ }
984
+
985
+ // Put the minimal distances into a heap structure to make the repeated
986
+ // global minimum searches fast.
987
+ nn_distances.heapify();
988
+
989
+ #ifdef FE_INVALID
990
+ if (feclearexcept(FE_INVALID)) assert(false && "fastcluster: fenv error");
991
+ #endif
992
+
993
+ // Main loop: We have N-1 merging steps.
994
+ for (i=0; i<N_1; ++i) {
995
+ /*
996
+ Here is a special feature that allows fast bookkeeping and updates of the
997
+ minimal distances.
998
+
999
+ mindist[i] stores a lower bound on the minimum distance of the point i to
1000
+ all points of higher index:
1001
+
1002
+ mindist[i] ≥ min_{j>i} D(i,j)
1003
+
1004
+ Normally, we have equality. However, this minimum may become invalid due
1005
+ to the updates in the distance matrix. The rules are:
1006
+
1007
+ 1) If mindist[i] is equal to D(i, n_nghbr[i]), this is the correct
1008
+ minimum and n_nghbr[i] is a nearest neighbor.
1009
+
1010
+ 2) If mindist[i] is smaller than D(i, n_nghbr[i]), this might not be the
1011
+ correct minimum. The minimum needs to be recomputed.
1012
+
1013
+ 3) mindist[i] is never bigger than the true minimum. Hence, we never
1014
+ miss the true minimum if we take the smallest mindist entry,
1015
+ re-compute the value if necessary (thus maybe increasing it) and
1016
+ looking for the now smallest mindist entry until a valid minimal
1017
+ entry is found. This step is done in the lines below.
1018
+
1019
+ The update process for D below takes care that these rules are
1020
+ fulfilled. This makes sure that the minima in the rows D(i,i+1:)of D are
1021
+ re-calculated when necessary but re-calculation is avoided whenever
1022
+ possible.
1023
+
1024
+ The re-calculation of the minima makes the worst-case runtime of this
1025
+ algorithm cubic in N. We avoid this whenever possible, and in most cases
1026
+ the runtime appears to be quadratic.
1027
+ */
1028
+ idx1 = nn_distances.argmin();
1029
+ if (method != METHOD_METR_SINGLE) {
1030
+ while ( mindist[idx1] < D_(idx1, n_nghbr[idx1]) ) {
1031
+ // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
1032
+ n_nghbr[idx1] = j = active_nodes.succ[idx1]; // exists, maximally N-1
1033
+ min = D_(idx1,j);
1034
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1035
+ if (D_(idx1,j)<min) {
1036
+ min = D_(idx1,j);
1037
+ n_nghbr[idx1] = j;
1038
+ }
1039
+ }
1040
+ /* Update the heap with the new true minimum and search for the
1041
+ (possibly different) minimal entry. */
1042
+ nn_distances.update_geq(idx1, min);
1043
+ idx1 = nn_distances.argmin();
1044
+ }
1045
+ }
1046
+
1047
+ nn_distances.heap_pop(); // Remove the current minimum from the heap.
1048
+ idx2 = n_nghbr[idx1];
1049
+
1050
+ // Write the newly found minimal pair of nodes to the output array.
1051
+ node1 = row_repr[idx1];
1052
+ node2 = row_repr[idx2];
1053
+
1054
+ if (method==METHOD_METR_AVERAGE ||
1055
+ method==METHOD_METR_WARD ||
1056
+ method==METHOD_METR_CENTROID) {
1057
+ size1 = static_cast<t_float>(members[idx1]);
1058
+ size2 = static_cast<t_float>(members[idx2]);
1059
+ members[idx2] += members[idx1];
1060
+ }
1061
+ Z2.append(node1, node2, mindist[idx1]);
1062
+
1063
+ // Remove idx1 from the list of active indices (active_nodes).
1064
+ active_nodes.remove(idx1);
1065
+ // Index idx2 now represents the new (merged) node with label N+i.
1066
+ row_repr[idx2] = N+i;
1067
+
1068
+ // Update the distance matrix
1069
+ switch (method) {
1070
+ case METHOD_METR_SINGLE:
1071
+ /*
1072
+ Single linkage.
1073
+
1074
+ Characteristic: new distances are never longer than the old distances.
1075
+ */
1076
+ // Update the distance matrix in the range [start, idx1).
1077
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1078
+ f_single(&D_(j, idx2), D_(j, idx1));
1079
+ if (n_nghbr[j] == idx1)
1080
+ n_nghbr[j] = idx2;
1081
+ }
1082
+ // Update the distance matrix in the range (idx1, idx2).
1083
+ for (; j<idx2; j=active_nodes.succ[j]) {
1084
+ f_single(&D_(j, idx2), D_(idx1, j));
1085
+ // If the new value is below the old minimum in a row, update
1086
+ // the mindist and n_nghbr arrays.
1087
+ if (D_(j, idx2) < mindist[j]) {
1088
+ nn_distances.update_leq(j, D_(j, idx2));
1089
+ n_nghbr[j] = idx2;
1090
+ }
1091
+ }
1092
+ // Update the distance matrix in the range (idx2, N).
1093
+ // Recompute the minimum mindist[idx2] and n_nghbr[idx2].
1094
+ if (idx2<N_1) {
1095
+ min = mindist[idx2];
1096
+ for (j=active_nodes.succ[idx2]; j<N; j=active_nodes.succ[j]) {
1097
+ f_single(&D_(idx2, j), D_(idx1, j) );
1098
+ if (D_(idx2, j) < min) {
1099
+ n_nghbr[idx2] = j;
1100
+ min = D_(idx2, j);
1101
+ }
1102
+ }
1103
+ nn_distances.update_leq(idx2, min);
1104
+ }
1105
+ break;
1106
+
1107
+ case METHOD_METR_COMPLETE:
1108
+ /*
1109
+ Complete linkage.
1110
+
1111
+ Characteristic: new distances are never shorter than the old distances.
1112
+ */
1113
+ // Update the distance matrix in the range [start, idx1).
1114
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1115
+ f_complete(&D_(j, idx2), D_(j, idx1) );
1116
+ if (n_nghbr[j] == idx1)
1117
+ n_nghbr[j] = idx2;
1118
+ }
1119
+ // Update the distance matrix in the range (idx1, idx2).
1120
+ for (; j<idx2; j=active_nodes.succ[j])
1121
+ f_complete(&D_(j, idx2), D_(idx1, j) );
1122
+ // Update the distance matrix in the range (idx2, N).
1123
+ for (j=active_nodes.succ[idx2]; j<N; j=active_nodes.succ[j])
1124
+ f_complete(&D_(idx2, j), D_(idx1, j) );
1125
+ break;
1126
+
1127
+ case METHOD_METR_AVERAGE: {
1128
+ /*
1129
+ Average linkage.
1130
+
1131
+ Shorter and longer distances can occur.
1132
+ */
1133
+ // Update the distance matrix in the range [start, idx1).
1134
+ t_float s = size1/(size1+size2);
1135
+ t_float t = size2/(size1+size2);
1136
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1137
+ f_average(&D_(j, idx2), D_(j, idx1), s, t);
1138
+ if (n_nghbr[j] == idx1)
1139
+ n_nghbr[j] = idx2;
1140
+ }
1141
+ // Update the distance matrix in the range (idx1, idx2).
1142
+ for (; j<idx2; j=active_nodes.succ[j]) {
1143
+ f_average(&D_(j, idx2), D_(idx1, j), s, t);
1144
+ if (D_(j, idx2) < mindist[j]) {
1145
+ nn_distances.update_leq(j, D_(j, idx2));
1146
+ n_nghbr[j] = idx2;
1147
+ }
1148
+ }
1149
+ // Update the distance matrix in the range (idx2, N).
1150
+ if (idx2<N_1) {
1151
+ n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
1152
+ f_average(&D_(idx2, j), D_(idx1, j), s, t);
1153
+ min = D_(idx2,j);
1154
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1155
+ f_average(&D_(idx2, j), D_(idx1, j), s, t);
1156
+ if (D_(idx2,j) < min) {
1157
+ min = D_(idx2,j);
1158
+ n_nghbr[idx2] = j;
1159
+ }
1160
+ }
1161
+ nn_distances.update(idx2, min);
1162
+ }
1163
+ break;
1164
+ }
1165
+
1166
+ case METHOD_METR_WEIGHTED:
1167
+ /*
1168
+ Weighted linkage.
1169
+
1170
+ Shorter and longer distances can occur.
1171
+ */
1172
+ // Update the distance matrix in the range [start, idx1).
1173
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1174
+ f_weighted(&D_(j, idx2), D_(j, idx1) );
1175
+ if (n_nghbr[j] == idx1)
1176
+ n_nghbr[j] = idx2;
1177
+ }
1178
+ // Update the distance matrix in the range (idx1, idx2).
1179
+ for (; j<idx2; j=active_nodes.succ[j]) {
1180
+ f_weighted(&D_(j, idx2), D_(idx1, j) );
1181
+ if (D_(j, idx2) < mindist[j]) {
1182
+ nn_distances.update_leq(j, D_(j, idx2));
1183
+ n_nghbr[j] = idx2;
1184
+ }
1185
+ }
1186
+ // Update the distance matrix in the range (idx2, N).
1187
+ if (idx2<N_1) {
1188
+ n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
1189
+ f_weighted(&D_(idx2, j), D_(idx1, j) );
1190
+ min = D_(idx2,j);
1191
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1192
+ f_weighted(&D_(idx2, j), D_(idx1, j) );
1193
+ if (D_(idx2,j) < min) {
1194
+ min = D_(idx2,j);
1195
+ n_nghbr[idx2] = j;
1196
+ }
1197
+ }
1198
+ nn_distances.update(idx2, min);
1199
+ }
1200
+ break;
1201
+
1202
+ case METHOD_METR_WARD:
1203
+ /*
1204
+ Ward linkage.
1205
+
1206
+ Shorter and longer distances can occur, not smaller than min(d1,d2)
1207
+ but maybe bigger than max(d1,d2).
1208
+ */
1209
+ // Update the distance matrix in the range [start, idx1).
1210
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1211
+ f_ward(&D_(j, idx2), D_(j, idx1), mindist[idx1],
1212
+ size1, size2, static_cast<t_float>(members[j]) );
1213
+ if (n_nghbr[j] == idx1)
1214
+ n_nghbr[j] = idx2;
1215
+ }
1216
+ // Update the distance matrix in the range (idx1, idx2).
1217
+ for (; j<idx2; j=active_nodes.succ[j]) {
1218
+ f_ward(&D_(j, idx2), D_(idx1, j), mindist[idx1], size1, size2,
1219
+ static_cast<t_float>(members[j]) );
1220
+ if (D_(j, idx2) < mindist[j]) {
1221
+ nn_distances.update_leq(j, D_(j, idx2));
1222
+ n_nghbr[j] = idx2;
1223
+ }
1224
+ }
1225
+ // Update the distance matrix in the range (idx2, N).
1226
+ if (idx2<N_1) {
1227
+ n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
1228
+ f_ward(&D_(idx2, j), D_(idx1, j), mindist[idx1],
1229
+ size1, size2, static_cast<t_float>(members[j]) );
1230
+ min = D_(idx2,j);
1231
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1232
+ f_ward(&D_(idx2, j), D_(idx1, j), mindist[idx1],
1233
+ size1, size2, static_cast<t_float>(members[j]) );
1234
+ if (D_(idx2,j) < min) {
1235
+ min = D_(idx2,j);
1236
+ n_nghbr[idx2] = j;
1237
+ }
1238
+ }
1239
+ nn_distances.update(idx2, min);
1240
+ }
1241
+ break;
1242
+
1243
+ case METHOD_METR_CENTROID: {
1244
+ /*
1245
+ Centroid linkage.
1246
+
1247
+ Shorter and longer distances can occur, not bigger than max(d1,d2)
1248
+ but maybe smaller than min(d1,d2).
1249
+ */
1250
+ // Update the distance matrix in the range [start, idx1).
1251
+ t_float s = size1/(size1+size2);
1252
+ t_float t = size2/(size1+size2);
1253
+ t_float stc = s*t*mindist[idx1];
1254
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1255
+ f_centroid(&D_(j, idx2), D_(j, idx1), stc, s, t);
1256
+ if (D_(j, idx2) < mindist[j]) {
1257
+ nn_distances.update_leq(j, D_(j, idx2));
1258
+ n_nghbr[j] = idx2;
1259
+ }
1260
+ else if (n_nghbr[j] == idx1)
1261
+ n_nghbr[j] = idx2;
1262
+ }
1263
+ // Update the distance matrix in the range (idx1, idx2).
1264
+ for (; j<idx2; j=active_nodes.succ[j]) {
1265
+ f_centroid(&D_(j, idx2), D_(idx1, j), stc, s, t);
1266
+ if (D_(j, idx2) < mindist[j]) {
1267
+ nn_distances.update_leq(j, D_(j, idx2));
1268
+ n_nghbr[j] = idx2;
1269
+ }
1270
+ }
1271
+ // Update the distance matrix in the range (idx2, N).
1272
+ if (idx2<N_1) {
1273
+ n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
1274
+ f_centroid(&D_(idx2, j), D_(idx1, j), stc, s, t);
1275
+ min = D_(idx2,j);
1276
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1277
+ f_centroid(&D_(idx2, j), D_(idx1, j), stc, s, t);
1278
+ if (D_(idx2,j) < min) {
1279
+ min = D_(idx2,j);
1280
+ n_nghbr[idx2] = j;
1281
+ }
1282
+ }
1283
+ nn_distances.update(idx2, min);
1284
+ }
1285
+ break;
1286
+ }
1287
+
1288
+ case METHOD_METR_MEDIAN: {
1289
+ /*
1290
+ Median linkage.
1291
+
1292
+ Shorter and longer distances can occur, not bigger than max(d1,d2)
1293
+ but maybe smaller than min(d1,d2).
1294
+ */
1295
+ // Update the distance matrix in the range [start, idx1).
1296
+ t_float c_4 = mindist[idx1]*.25;
1297
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1298
+ f_median(&D_(j, idx2), D_(j, idx1), c_4 );
1299
+ if (D_(j, idx2) < mindist[j]) {
1300
+ nn_distances.update_leq(j, D_(j, idx2));
1301
+ n_nghbr[j] = idx2;
1302
+ }
1303
+ else if (n_nghbr[j] == idx1)
1304
+ n_nghbr[j] = idx2;
1305
+ }
1306
+ // Update the distance matrix in the range (idx1, idx2).
1307
+ for (; j<idx2; j=active_nodes.succ[j]) {
1308
+ f_median(&D_(j, idx2), D_(idx1, j), c_4 );
1309
+ if (D_(j, idx2) < mindist[j]) {
1310
+ nn_distances.update_leq(j, D_(j, idx2));
1311
+ n_nghbr[j] = idx2;
1312
+ }
1313
+ }
1314
+ // Update the distance matrix in the range (idx2, N).
1315
+ if (idx2<N_1) {
1316
+ n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
1317
+ f_median(&D_(idx2, j), D_(idx1, j), c_4 );
1318
+ min = D_(idx2,j);
1319
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1320
+ f_median(&D_(idx2, j), D_(idx1, j), c_4 );
1321
+ if (D_(idx2,j) < min) {
1322
+ min = D_(idx2,j);
1323
+ n_nghbr[idx2] = j;
1324
+ }
1325
+ }
1326
+ nn_distances.update(idx2, min);
1327
+ }
1328
+ break;
1329
+ }
1330
+
1331
+ default:
1332
+ assert(false && "fastcluster: invalid method");
1333
+ }
1334
+ }
1335
+ #ifdef FE_INVALID
1336
+ if (fetestexcept(FE_INVALID)) assert(false && "fastcluster: fenv error");
1337
+ #endif
1338
+ }
1339
+
1340
+ /*
1341
+ Clustering methods for vector data
1342
+ */
1343
+
1344
+ template <typename t_dissimilarity>
1345
+ static void MST_linkage_core_vector(const t_index N,
1346
+ t_dissimilarity & dist,
1347
+ cluster_result & Z2) {
1348
+ /*
1349
+ N: integer, number of data points
1350
+ dist: function pointer to the metric
1351
+ Z2: output data structure
1352
+
1353
+ The basis of this algorithm is an algorithm by Rohlf:
1354
+
1355
+ F. James Rohlf, Hierarchical clustering using the minimum spanning tree,
1356
+ The Computer Journal, vol. 16, 1973, p. 93–95.
1357
+ */
1358
+ t_index i;
1359
+ t_index idx2;
1360
+ doubly_linked_list active_nodes(N);
1361
+ auto_array_ptr<t_float> d(N);
1362
+
1363
+ t_index prev_node;
1364
+ t_float min;
1365
+
1366
+ // first iteration
1367
+ idx2 = 1;
1368
+ min = std::numeric_limits<t_float>::infinity();
1369
+ for (i=1; i<N; ++i) {
1370
+ d[i] = dist(0,i);
1371
+ #if HAVE_DIAGNOSTIC
1372
+ #pragma GCC diagnostic push
1373
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
1374
+ #endif
1375
+ if (d[i] < min) {
1376
+ min = d[i];
1377
+ idx2 = i;
1378
+ }
1379
+ else if (fc_isnan(d[i]))
1380
+ assert(false && "fastcluster: nan error");
1381
+ #if HAVE_DIAGNOSTIC
1382
+ #pragma GCC diagnostic pop
1383
+ #endif
1384
+ }
1385
+
1386
+ Z2.append(0, idx2, min);
1387
+
1388
+ for (t_index j=1; j<N-1; ++j) {
1389
+ prev_node = idx2;
1390
+ active_nodes.remove(prev_node);
1391
+
1392
+ idx2 = active_nodes.succ[0];
1393
+ min = d[idx2];
1394
+
1395
+ for (i=idx2; i<N; i=active_nodes.succ[i]) {
1396
+ t_float tmp = dist(i, prev_node);
1397
+ #if HAVE_DIAGNOSTIC
1398
+ #pragma GCC diagnostic push
1399
+ #pragma GCC diagnostic ignored "-Wfloat-equal"
1400
+ #endif
1401
+ if (d[i] > tmp)
1402
+ d[i] = tmp;
1403
+ else if (fc_isnan(tmp))
1404
+ assert(false && "fastcluster: nan error");
1405
+ #if HAVE_DIAGNOSTIC
1406
+ #pragma GCC diagnostic pop
1407
+ #endif
1408
+ if (d[i] < min) {
1409
+ min = d[i];
1410
+ idx2 = i;
1411
+ }
1412
+ }
1413
+ Z2.append(prev_node, idx2, min);
1414
+ }
1415
+ }
1416
+
1417
+ template <method_codes_vector method, typename t_dissimilarity>
1418
+ static void generic_linkage_vector(const t_index N,
1419
+ t_dissimilarity & dist,
1420
+ cluster_result & Z2) {
1421
+ /*
1422
+ N: integer, number of data points
1423
+ dist: function pointer to the metric
1424
+ Z2: output data structure
1425
+
1426
+ This algorithm is valid for the distance update methods
1427
+ "Ward", "centroid" and "median" only!
1428
+ */
1429
+ const t_index N_1 = N-1;
1430
+ t_index i, j; // loop variables
1431
+ t_index idx1, idx2; // row and column indices
1432
+
1433
+ auto_array_ptr<t_index> n_nghbr(N_1); // array of nearest neighbors
1434
+ auto_array_ptr<t_float> mindist(N_1); // distances to the nearest neighbors
1435
+ auto_array_ptr<t_index> row_repr(N); // row_repr[i]: node number that the
1436
+ // i-th row represents
1437
+ doubly_linked_list active_nodes(N);
1438
+ binary_min_heap nn_distances(&*mindist, N_1); // minimum heap structure for
1439
+ // the distance to the nearest neighbor of each point
1440
+ t_index node1, node2; // node numbers in the output
1441
+ t_float min; // minimum and row index for nearest-neighbor search
1442
+
1443
+ for (i=0; i<N; ++i)
1444
+ // Build a list of row ↔ node label assignments.
1445
+ // Initially i ↦ i
1446
+ row_repr[i] = i;
1447
+
1448
+ // Initialize the minimal distances:
1449
+ // Find the nearest neighbor of each point.
1450
+ // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
1451
+ for (i=0; i<N_1; ++i) {
1452
+ min = std::numeric_limits<t_float>::infinity();
1453
+ t_index idx;
1454
+ for (idx=j=i+1; j<N; ++j) {
1455
+ t_float tmp;
1456
+ switch (method) {
1457
+ case METHOD_VECTOR_WARD:
1458
+ tmp = dist.ward_initial(i,j);
1459
+ break;
1460
+ default:
1461
+ tmp = dist.template sqeuclidean<true>(i,j);
1462
+ }
1463
+ if (tmp<min) {
1464
+ min = tmp;
1465
+ idx = j;
1466
+ }
1467
+ }
1468
+ switch (method) {
1469
+ case METHOD_VECTOR_WARD:
1470
+ mindist[i] = t_dissimilarity::ward_initial_conversion(min);
1471
+ break;
1472
+ default:
1473
+ mindist[i] = min;
1474
+ }
1475
+ n_nghbr[i] = idx;
1476
+ }
1477
+
1478
+ // Put the minimal distances into a heap structure to make the repeated
1479
+ // global minimum searches fast.
1480
+ nn_distances.heapify();
1481
+
1482
+ // Main loop: We have N-1 merging steps.
1483
+ for (i=0; i<N_1; ++i) {
1484
+ idx1 = nn_distances.argmin();
1485
+
1486
+ while ( active_nodes.is_inactive(n_nghbr[idx1]) ) {
1487
+ // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
1488
+ n_nghbr[idx1] = j = active_nodes.succ[idx1]; // exists, maximally N-1
1489
+ switch (method) {
1490
+ case METHOD_VECTOR_WARD:
1491
+ min = dist.ward(idx1,j);
1492
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1493
+ t_float const tmp = dist.ward(idx1,j);
1494
+ if (tmp<min) {
1495
+ min = tmp;
1496
+ n_nghbr[idx1] = j;
1497
+ }
1498
+ }
1499
+ break;
1500
+ default:
1501
+ min = dist.template sqeuclidean<true>(idx1,j);
1502
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1503
+ t_float const tmp = dist.template sqeuclidean<true>(idx1,j);
1504
+ if (tmp<min) {
1505
+ min = tmp;
1506
+ n_nghbr[idx1] = j;
1507
+ }
1508
+ }
1509
+ }
1510
+ /* Update the heap with the new true minimum and search for the (possibly
1511
+ different) minimal entry. */
1512
+ nn_distances.update_geq(idx1, min);
1513
+ idx1 = nn_distances.argmin();
1514
+ }
1515
+
1516
+ nn_distances.heap_pop(); // Remove the current minimum from the heap.
1517
+ idx2 = n_nghbr[idx1];
1518
+
1519
+ // Write the newly found minimal pair of nodes to the output array.
1520
+ node1 = row_repr[idx1];
1521
+ node2 = row_repr[idx2];
1522
+
1523
+ Z2.append(node1, node2, mindist[idx1]);
1524
+
1525
+ switch (method) {
1526
+ case METHOD_VECTOR_WARD:
1527
+ case METHOD_VECTOR_CENTROID:
1528
+ dist.merge_inplace(idx1, idx2);
1529
+ break;
1530
+ case METHOD_VECTOR_MEDIAN:
1531
+ dist.merge_inplace_weighted(idx1, idx2);
1532
+ break;
1533
+ default:
1534
+ assert(false && "fastcluster: invalid method");
1535
+ }
1536
+
1537
+ // Index idx2 now represents the new (merged) node with label N+i.
1538
+ row_repr[idx2] = N+i;
1539
+ // Remove idx1 from the list of active indices (active_nodes).
1540
+ active_nodes.remove(idx1); // TBD later!!!
1541
+
1542
+ // Update the distance matrix
1543
+ switch (method) {
1544
+ case METHOD_VECTOR_WARD:
1545
+ /*
1546
+ Ward linkage.
1547
+
1548
+ Shorter and longer distances can occur, not smaller than min(d1,d2)
1549
+ but maybe bigger than max(d1,d2).
1550
+ */
1551
+ // Update the distance matrix in the range [start, idx1).
1552
+ for (j=active_nodes.start; j<idx1; j=active_nodes.succ[j]) {
1553
+ if (n_nghbr[j] == idx2) {
1554
+ n_nghbr[j] = idx1; // invalidate
1555
+ }
1556
+ }
1557
+ // Update the distance matrix in the range (idx1, idx2).
1558
+ for ( ; j<idx2; j=active_nodes.succ[j]) {
1559
+ t_float const tmp = dist.ward(j, idx2);
1560
+ if (tmp < mindist[j]) {
1561
+ nn_distances.update_leq(j, tmp);
1562
+ n_nghbr[j] = idx2;
1563
+ }
1564
+ else if (n_nghbr[j]==idx2) {
1565
+ n_nghbr[j] = idx1; // invalidate
1566
+ }
1567
+ }
1568
+ // Find the nearest neighbor for idx2.
1569
+ if (idx2<N_1) {
1570
+ n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
1571
+ min = dist.ward(idx2,j);
1572
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1573
+ t_float const tmp = dist.ward(idx2,j);
1574
+ if (tmp < min) {
1575
+ min = tmp;
1576
+ n_nghbr[idx2] = j;
1577
+ }
1578
+ }
1579
+ nn_distances.update(idx2, min);
1580
+ }
1581
+ break;
1582
+
1583
+ default:
1584
+ /*
1585
+ Centroid and median linkage.
1586
+
1587
+ Shorter and longer distances can occur, not bigger than max(d1,d2)
1588
+ but maybe smaller than min(d1,d2).
1589
+ */
1590
+ for (j=active_nodes.start; j<idx2; j=active_nodes.succ[j]) {
1591
+ t_float const tmp = dist.template sqeuclidean<true>(j, idx2);
1592
+ if (tmp < mindist[j]) {
1593
+ nn_distances.update_leq(j, tmp);
1594
+ n_nghbr[j] = idx2;
1595
+ }
1596
+ else if (n_nghbr[j] == idx2)
1597
+ n_nghbr[j] = idx1; // invalidate
1598
+ }
1599
+ // Find the nearest neighbor for idx2.
1600
+ if (idx2<N_1) {
1601
+ n_nghbr[idx2] = j = active_nodes.succ[idx2]; // exists, maximally N-1
1602
+ min = dist.template sqeuclidean<true>(idx2,j);
1603
+ for (j=active_nodes.succ[j]; j<N; j=active_nodes.succ[j]) {
1604
+ t_float const tmp = dist.template sqeuclidean<true>(idx2, j);
1605
+ if (tmp < min) {
1606
+ min = tmp;
1607
+ n_nghbr[idx2] = j;
1608
+ }
1609
+ }
1610
+ nn_distances.update(idx2, min);
1611
+ }
1612
+ }
1613
+ }
1614
+ }
1615
+
1616
+ template <method_codes_vector method, typename t_dissimilarity>
1617
+ static void generic_linkage_vector_alternative(const t_index N,
1618
+ t_dissimilarity & dist,
1619
+ cluster_result & Z2) {
1620
+ /*
1621
+ N: integer, number of data points
1622
+ dist: function pointer to the metric
1623
+ Z2: output data structure
1624
+
1625
+ This algorithm is valid for the distance update methods
1626
+ "Ward", "centroid" and "median" only!
1627
+ */
1628
+ const t_index N_1 = N-1;
1629
+ t_index i, j=0; // loop variables
1630
+ t_index idx1, idx2; // row and column indices
1631
+
1632
+ auto_array_ptr<t_index> n_nghbr(2*N-2); // array of nearest neighbors
1633
+ auto_array_ptr<t_float> mindist(2*N-2); // distances to the nearest neighbors
1634
+
1635
+ doubly_linked_list active_nodes(N+N_1);
1636
+ binary_min_heap nn_distances(&*mindist, N_1, 2*N-2, 1); // minimum heap
1637
+ // structure for the distance to the nearest neighbor of each point
1638
+
1639
+ t_float min; // minimum for nearest-neighbor searches
1640
+
1641
+ // Initialize the minimal distances:
1642
+ // Find the nearest neighbor of each point.
1643
+ // n_nghbr[i] = argmin_{j>i} D(i,j) for i in range(N-1)
1644
+ for (i=1; i<N; ++i) {
1645
+ min = std::numeric_limits<t_float>::infinity();
1646
+ t_index idx;
1647
+ for (idx=j=0; j<i; ++j) {
1648
+ t_float tmp;
1649
+ switch (method) {
1650
+ case METHOD_VECTOR_WARD:
1651
+ tmp = dist.ward_initial(i,j);
1652
+ break;
1653
+ default:
1654
+ tmp = dist.template sqeuclidean<true>(i,j);
1655
+ }
1656
+ if (tmp<min) {
1657
+ min = tmp;
1658
+ idx = j;
1659
+ }
1660
+ }
1661
+ switch (method) {
1662
+ case METHOD_VECTOR_WARD:
1663
+ mindist[i] = t_dissimilarity::ward_initial_conversion(min);
1664
+ break;
1665
+ default:
1666
+ mindist[i] = min;
1667
+ }
1668
+ n_nghbr[i] = idx;
1669
+ }
1670
+
1671
+ // Put the minimal distances into a heap structure to make the repeated
1672
+ // global minimum searches fast.
1673
+ nn_distances.heapify();
1674
+
1675
+ // Main loop: We have N-1 merging steps.
1676
+ for (i=N; i<N+N_1; ++i) {
1677
+ /*
1678
+ The bookkeeping is different from the "stored matrix approach" algorithm
1679
+ generic_linkage.
1680
+
1681
+ mindist[i] stores a lower bound on the minimum distance of the point i to
1682
+ all points of *lower* index:
1683
+
1684
+ mindist[i] ≥ min_{j<i} D(i,j)
1685
+
1686
+ Moreover, new nodes do not re-use one of the old indices, but they are
1687
+ given a new, unique index (SciPy convention: initial nodes are 0,…,N−1,
1688
+ new nodes are N,…,2N−2).
1689
+
1690
+ Invalid nearest neighbors are not recognized by the fact that the stored
1691
+ distance is smaller than the actual distance, but the list active_nodes
1692
+ maintains a flag whether a node is inactive. If n_nghbr[i] points to an
1693
+ active node, the entries nn_distances[i] and n_nghbr[i] are valid,
1694
+ otherwise they must be recomputed.
1695
+ */
1696
+ idx1 = nn_distances.argmin();
1697
+ while ( active_nodes.is_inactive(n_nghbr[idx1]) ) {
1698
+ // Recompute the minimum mindist[idx1] and n_nghbr[idx1].
1699
+ n_nghbr[idx1] = j = active_nodes.start;
1700
+ switch (method) {
1701
+ case METHOD_VECTOR_WARD:
1702
+ min = dist.ward_extended(idx1,j);
1703
+ for (j=active_nodes.succ[j]; j<idx1; j=active_nodes.succ[j]) {
1704
+ t_float tmp = dist.ward_extended(idx1,j);
1705
+ if (tmp<min) {
1706
+ min = tmp;
1707
+ n_nghbr[idx1] = j;
1708
+ }
1709
+ }
1710
+ break;
1711
+ default:
1712
+ min = dist.sqeuclidean_extended(idx1,j);
1713
+ for (j=active_nodes.succ[j]; j<idx1; j=active_nodes.succ[j]) {
1714
+ t_float const tmp = dist.sqeuclidean_extended(idx1,j);
1715
+ if (tmp<min) {
1716
+ min = tmp;
1717
+ n_nghbr[idx1] = j;
1718
+ }
1719
+ }
1720
+ }
1721
+ /* Update the heap with the new true minimum and search for the (possibly
1722
+ different) minimal entry. */
1723
+ nn_distances.update_geq(idx1, min);
1724
+ idx1 = nn_distances.argmin();
1725
+ }
1726
+
1727
+ idx2 = n_nghbr[idx1];
1728
+ active_nodes.remove(idx1);
1729
+ active_nodes.remove(idx2);
1730
+
1731
+ Z2.append(idx1, idx2, mindist[idx1]);
1732
+
1733
+ if (i<2*N_1) {
1734
+ switch (method) {
1735
+ case METHOD_VECTOR_WARD:
1736
+ case METHOD_VECTOR_CENTROID:
1737
+ dist.merge(idx1, idx2, i);
1738
+ break;
1739
+
1740
+ case METHOD_VECTOR_MEDIAN:
1741
+ dist.merge_weighted(idx1, idx2, i);
1742
+ break;
1743
+
1744
+ default:
1745
+ assert(false && "fastcluster: invalid method");
1746
+ }
1747
+
1748
+ n_nghbr[i] = active_nodes.start;
1749
+ if (method==METHOD_VECTOR_WARD) {
1750
+ /*
1751
+ Ward linkage.
1752
+
1753
+ Shorter and longer distances can occur, not smaller than min(d1,d2)
1754
+ but maybe bigger than max(d1,d2).
1755
+ */
1756
+ min = dist.ward_extended(active_nodes.start, i);
1757
+ for (j=active_nodes.succ[active_nodes.start]; j<i;
1758
+ j=active_nodes.succ[j]) {
1759
+ t_float tmp = dist.ward_extended(j, i);
1760
+ if (tmp < min) {
1761
+ min = tmp;
1762
+ n_nghbr[i] = j;
1763
+ }
1764
+ }
1765
+ }
1766
+ else {
1767
+ /*
1768
+ Centroid and median linkage.
1769
+
1770
+ Shorter and longer distances can occur, not bigger than max(d1,d2)
1771
+ but maybe smaller than min(d1,d2).
1772
+ */
1773
+ min = dist.sqeuclidean_extended(active_nodes.start, i);
1774
+ for (j=active_nodes.succ[active_nodes.start]; j<i;
1775
+ j=active_nodes.succ[j]) {
1776
+ t_float tmp = dist.sqeuclidean_extended(j, i);
1777
+ if (tmp < min) {
1778
+ min = tmp;
1779
+ n_nghbr[i] = j;
1780
+ }
1781
+ }
1782
+ }
1783
+ if (idx2<active_nodes.start) {
1784
+ nn_distances.remove(active_nodes.start);
1785
+ } else {
1786
+ nn_distances.remove(idx2);
1787
+ }
1788
+ nn_distances.replace(idx1, i, min);
1789
+ }
1790
+ }
1791
+ }
1792
+
1793
+ #if HAVE_VISIBILITY
1794
+ #pragma GCC visibility pop
1795
+ #endif