complearn 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,399 @@
1
+ #include <stdio.h>
2
+ #include <assert.h>
3
+ #include <ctype.h>
4
+ #include <stdlib.h>
5
+ #include <unistd.h>
6
+ #include <sys/types.h>
7
+ #include <malloc.h>
8
+ #include <sys/time.h>
9
+
10
+ #include "ruby.h"
11
+ #include "Quartet.h"
12
+
13
+ void printTree(const struct NodeTree *nt)
14
+ {
15
+ int i, j;
16
+ printf("Tree size: %d\n", nt->size);
17
+ for (i = 0; i < nt->size; ++i) {
18
+ const struct NodeList *cur = &nt->nl[i];
19
+ for (j = 0; j < cur->size; ++j) {
20
+ printf("%d->%d, ", i, cur->n[j]);
21
+ }
22
+ }
23
+ printf("\n");
24
+ }
25
+
26
+ struct NodeTree *newNodeTree(int size)
27
+ {
28
+ // m2.
29
+ struct NodeTree *result = (struct NodeTree *) calloc(sizeof(struct NodeTree), 1);
30
+ result->size = size;
31
+ // m12.
32
+ result->nl = (struct NodeList *) calloc(sizeof(struct NodeList), size);
33
+ return result;
34
+ }
35
+
36
+ void freeNodeList(struct NodeList *nl)
37
+ {
38
+ nl->size = 0;
39
+ // f12.
40
+ free(nl);
41
+ }
42
+
43
+ void freeNodeTree(struct NodeTree *nt)
44
+ {
45
+ freeNodeList(nt->nl);
46
+ nt->nl = NULL;
47
+ nt->size = 0;
48
+ // f2.
49
+ free(nt);
50
+ }
51
+
52
+ /* Allocate a new DistMatrix square of side length size, or size*size places */
53
+ struct DistMatrix *newDistMatrix(int size)
54
+ {
55
+ struct DistMatrix *dm;
56
+ int i;
57
+
58
+ // m3.
59
+ dm = (struct DistMatrix *) calloc(sizeof(struct DistMatrix), 1);
60
+ dm->size = size;
61
+
62
+ // m4.
63
+ dm->vals = (weight_t **) calloc(sizeof(weight_t *), size);
64
+
65
+ // m5.
66
+ for (i = 0; i < size; ++i)
67
+ dm->vals[i] = (weight_t *) calloc(sizeof(weight_t), size);
68
+
69
+ return dm;
70
+ }
71
+
72
+ /* Free a DistMatrix */
73
+ void freeDistMatrix(struct DistMatrix *dm)
74
+ {
75
+ int i;
76
+ // f5.
77
+ for (i = 0; i < dm->size; ++i)
78
+ free(dm->vals[i]);
79
+ // f4.
80
+ free(dm->vals);
81
+ dm->vals = NULL;
82
+ // f3.
83
+ free(dm);
84
+ }
85
+
86
+ /*
87
+ * Calculate the cost of quartet q relative to a distance matrix dm
88
+ * using the formula Cost = LeftSubtreeDistance + RightSubtreeDistance
89
+ */
90
+ weight_t calculateCost(struct Quartet *q, const struct DistMatrix *dm)
91
+ {
92
+ int i;
93
+ for (i = 0; i < 4; ++i)
94
+ assert(dm->size > q->q[i]);
95
+ return dm->vals[q->q[0]][q->q[1]] + dm->vals[q->q[2]][q->q[3]];
96
+ }
97
+
98
+ /*
99
+ * Create a list of all 3 possible quartet arrangements of four nodes.
100
+ * Copy these into the area pointed to by result.
101
+ */
102
+ void makeAllQuartetsFromLabels(int i,int j,int k,int l,
103
+ const struct DistMatrix *dm, struct Quartet *result)
104
+ {
105
+ struct Quartet cand[3] = {
106
+ { 0, { i, j, k, l } },
107
+ { 0, { i, k, j, l } },
108
+ { 0, { i, l, j, k } }
109
+ };
110
+ int c;
111
+ for (c = 0; c < 3; ++c) {
112
+ cand[c].weight = -calculateCost(cand+c, dm);
113
+ result[c] = cand[c];
114
+ }
115
+ }
116
+
117
+ /*
118
+ * Make just the best quartet possible for this unordered set of four labels.
119
+ * Copy this into the result area.
120
+ */
121
+ void makeSingleQuartetFromLabels(int i,int j,int k,int l,
122
+ const struct DistMatrix *dm, struct Quartet *result)
123
+ {
124
+ struct Quartet cand[3] = {
125
+ { 0, { i, j, k, l } },
126
+ { 0, { i, k, j, l } },
127
+ { 0, { i, l, j, k } }
128
+ };
129
+
130
+ int c;
131
+ int flag;
132
+
133
+ for (c = 0; c < 3; ++c)
134
+ cand[c].weight = calculateCost(cand+c, dm);
135
+
136
+ /* Bubble-sort the three weights */
137
+ do {
138
+ flag = 0;
139
+ for (c = 0; c < 2; ++c)
140
+ if (cand[c].weight > cand[c+1].weight) {
141
+ struct Quartet tmp = cand[c];
142
+ cand[c] = cand[c+1];
143
+ cand[c+1] = tmp;
144
+ flag = 1;
145
+ }
146
+ } while (flag == 1);
147
+ // Make weight equal cost difference between top two candidates
148
+ cand[0].weight = cand[1].weight - cand[0].weight;
149
+ *result = cand[0];
150
+ }
151
+
152
+ /* Return the number of ways to choose k items from a set of n without ordering */
153
+ unsigned int choose(unsigned int n, unsigned int k)
154
+ {
155
+ int i;
156
+ int acc = 1;
157
+ assert(n >= k);
158
+ for (i = n; i > n-k; i--)
159
+ acc *= i;
160
+ for (i = 2; i <= k; i++)
161
+ acc /= i;
162
+ return acc;
163
+ }
164
+
165
+ /*
166
+ * Make complete (all 3*n) quartet list for this weight matrix.
167
+ */
168
+ struct QuartetList *makeFullQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best)
169
+ {
170
+ int i, j, k, l;
171
+ weight_t wmin, wmax;
172
+ int m;
173
+ int c = dm->size;
174
+ unsigned int curindex = 0;
175
+ // m6.
176
+ struct QuartetList *result = (struct QuartetList *) calloc(sizeof(struct QuartetList), 1);
177
+ result->num = 3 * choose(dm->size, 4);
178
+ // m7.
179
+ result->q = (struct Quartet *) calloc(sizeof(struct Quartet), result->num);
180
+ *worst = *best = 0.0;
181
+ for (i = 0; i < c; ++i)
182
+ for (j = i+1; j < c; ++j)
183
+ for (k = j+1; k < c; ++k)
184
+ for (l = k+1; l < c; ++l) {
185
+ makeAllQuartetsFromLabels(i,j,k,l,dm,result->q+curindex);
186
+ wmin = wmax = result->q[curindex].weight;
187
+ for (m = 1; m < 3; ++m) {
188
+ if (wmin > result->q[curindex+m].weight)
189
+ wmin = result->q[curindex+m].weight;
190
+ if (wmax < result->q[curindex+m].weight)
191
+ wmax = result->q[curindex+m].weight;
192
+ }
193
+ curindex += 3;
194
+ *worst += wmin;
195
+ *best += wmax;
196
+ }
197
+ return result;
198
+ }
199
+
200
+ /*
201
+ * Make a list of just the best quartets for this distance matrix
202
+ * This is 1/3 of the size of the full list
203
+ */
204
+ struct QuartetList *makeBestQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best)
205
+ {
206
+ int i, j, k, l, c;
207
+ weight_t total = 0;
208
+ unsigned int curindex = 0;
209
+ // m8.
210
+ struct QuartetList *result = (struct QuartetList *) calloc(sizeof(struct QuartetList), 1);
211
+ result->num = choose(dm->size, 4);
212
+ // m9.
213
+ result->q = (struct Quartet *) calloc(sizeof(struct Quartet), result->num);
214
+ c = dm->size;
215
+ for (i = 0; i < c; ++i)
216
+ for (j = i+1; j < c; ++j)
217
+ for (k = j+1; k < c; ++k)
218
+ for (l = k+1; l < c; ++l) {
219
+ makeSingleQuartetFromLabels(i,j,k,l,dm,result->q+curindex);
220
+ total += result->q[curindex].weight;
221
+ curindex += 1;
222
+ }
223
+ *worst = 0.0;
224
+ *best = total;
225
+ return result;
226
+ }
227
+
228
+ static qbase_t pop_front(struct NodeList *nl)
229
+ {
230
+ qbase_t result;
231
+ assert(nl->size > 0);
232
+ result = nl->n[0];
233
+ nl->size -= 1;
234
+ memmove(nl->n, nl->n+1, sizeof(nl->n[0])*nl->size);
235
+ return result;
236
+ }
237
+
238
+ void push_back(struct NodeList *nl, qbase_t which)
239
+ {
240
+ nl->n[nl->size] = which;
241
+ nl->size += 1;
242
+ }
243
+
244
+ /*
245
+ * Calculate the shortest path map for a given root node.
246
+ * For a tree of n nodes, the map from r will look like this:
247
+ * result[r] == r (only the root node points to itself)
248
+ * for any other x != r, result[x] is the unique neighboring node whose
249
+ * distance to r is exactly 1 less than x's.
250
+ *
251
+ */
252
+ void shortestPathTree(qbase_t root, const struct NodeTree *tree, struct NodeList *result)
253
+ {
254
+ struct NodeList pending;
255
+ struct NodeList visitted;
256
+ pending.size = visitted.size = 0;
257
+ push_back(&pending, root);
258
+ memset(visitted.n, 0, sizeof(visitted.n[0]) * tree->size);
259
+ visitted.n[root] = 1;
260
+ result->n[root] = root;
261
+ while (pending.size > 0) {
262
+ qbase_t from = pop_front(&pending);
263
+ int i;
264
+ for (i = 0; i < tree->nl[from].size; ++i) {
265
+ qbase_t cur = tree->nl[from].n[i];
266
+ if (!visitted.n[cur]) {
267
+ visitted.n[cur] = 1;
268
+ push_back(&pending, cur);
269
+ result->n[cur] = from;
270
+ }
271
+ }
272
+ }
273
+ }
274
+
275
+ /*
276
+ * Calculate the shortest path tree for all nodes in this tree and
277
+ * return this result as a list of NodeList as in shortestPathTree.
278
+ * That is, result[0] will contain the shortest path map rooted at node 0,
279
+ * result[1] will be rooted at node 1, and so on.
280
+ */
281
+ struct NodeTree *allShortestPathTrees(const struct NodeTree *tree)
282
+ {
283
+ struct NodeTree *result = newNodeTree(tree->size);
284
+ qbase_t i;
285
+ for (i = 0; i < tree->size; ++i) {
286
+ shortestPathTree(i, tree, &result->nl[i]);
287
+ }
288
+ return result;
289
+ }
290
+
291
+ /*
292
+ * Finds the (shortest and only) path starting at a and going to b.
293
+ * Returns this as a list of the following form:
294
+ * a .. n1 .. n2 .. b
295
+ * or if a == b, then
296
+ * a
297
+ */
298
+ void findPath(struct NodeList *result, const struct FastTree *ft, qbase_t a, qbase_t b)
299
+ {
300
+ const struct NodeList *nl = &ft->spm->nl[b];
301
+ result->size = 0;
302
+ while (a != b) {
303
+ push_back(result, a);
304
+ a = nl->n[a];
305
+ }
306
+ push_back(result, a);
307
+ }
308
+
309
+ /*
310
+ * Returns 1 iff the two paths a and b intersect.
311
+ */
312
+ int pathsIntersect(const struct NodeList *a, const struct NodeList *b)
313
+ {
314
+ int i, j;
315
+ for (i = 0; i < a->size; i++)
316
+ for (j = 0; j < b->size; j++)
317
+ if (a->n[i] == b->n[j])
318
+ return 1;
319
+ return 0;
320
+ }
321
+
322
+ int quartetCompatible(const struct Quartet* q, const struct FastTree *ft)
323
+ {
324
+ struct NodeList pa, pb;
325
+ findPath(&pa, ft, q->q[0], q->q[1]);
326
+ findPath(&pb, ft, q->q[2], q->q[3]);
327
+ return !pathsIntersect(&pa,&pb);
328
+ }
329
+
330
+ weight_t calculateWeightedScore(const struct FastTree *ft, const struct QuartetList *ql)
331
+ {
332
+ weight_t acc=0;
333
+ unsigned int i;
334
+ for (i = 0; i < ql->num; ++i) {
335
+ if (quartetCompatible(&ql->q[i], ft))
336
+ acc += ql->q[i].weight;
337
+ }
338
+ return acc;
339
+ }
340
+
341
+ void removeFromList(struct NodeList *nl, qbase_t me)
342
+ {
343
+ int i;
344
+ int oldsize = nl->size;
345
+ nl->size = 0;
346
+ for (i = 0; i < oldsize; ++i) {
347
+ qbase_t cur = nl->n[i];
348
+ if (cur != me)
349
+ push_back(nl, cur);
350
+ }
351
+ }
352
+
353
+ void disconnectNodes(struct FastTree *ft, qbase_t a, qbase_t b)
354
+ {
355
+ removeFromList(&ft->tree->nl[a], b);
356
+ removeFromList(&ft->tree->nl[b], a);
357
+ }
358
+
359
+ void connectNodes(struct FastTree *ft, qbase_t a, qbase_t b)
360
+ {
361
+ push_back(&ft->tree->nl[a], b);
362
+ push_back(&ft->tree->nl[b], a);
363
+ }
364
+
365
+ void freeQuartetList(struct QuartetList *ql)
366
+ {
367
+ // f7. f6. or f9. f8.
368
+ free(ql->q);
369
+ ql->q = NULL;
370
+ free(ql);
371
+ }
372
+
373
+ struct FastTree *newFastTree(int size)
374
+ {
375
+ // m10.
376
+ struct FastTree *result = (struct FastTree *) calloc(sizeof(struct FastTree), 1);
377
+ result->spec = size;
378
+ result->score = 0;
379
+ result->tree = newNodeTree(size);
380
+ result->spm = NULL;
381
+ return result;
382
+ }
383
+
384
+ void freeFastTree(struct FastTree *ft)
385
+ {
386
+ if (ft->tree) {
387
+ freeNodeTree(ft->tree);
388
+ ft->tree = NULL;
389
+ }
390
+ if (ft->spm) {
391
+ freeNodeTree(ft->spm);
392
+ ft->spm = NULL;
393
+ }
394
+ ft->spec = 0;
395
+ ft->score = 0;
396
+ // f10.
397
+ free(ft);
398
+ }
399
+
@@ -0,0 +1,62 @@
1
+ #ifndef __MQUARTET_H
2
+ #define __MQUARTET_H
3
+
4
+ #define MAXNODES 1024
5
+
6
+ #include <sys/types.h>
7
+
8
+ typedef unsigned char qbase_t;
9
+ typedef double weight_t;
10
+ typedef double dist_t;
11
+
12
+ struct Quartet {
13
+ weight_t weight;
14
+ qbase_t q[4];
15
+ };
16
+
17
+ struct NodeList {
18
+ size_t size;
19
+ qbase_t n[MAXNODES];
20
+ };
21
+
22
+ struct NodeTree {
23
+ size_t size;
24
+ struct NodeList *nl;
25
+ };
26
+
27
+ struct DistMatrix {
28
+ size_t size;
29
+ double **vals;
30
+ };
31
+
32
+ struct QuartetList {
33
+ size_t num;
34
+ struct Quartet *q;
35
+ };
36
+
37
+ struct FastTree {
38
+ int spec;
39
+ weight_t score;
40
+ struct NodeTree *tree, *spm;
41
+ };
42
+
43
+
44
+ struct FastTree *newFastTree(int size);
45
+ void freeFastTree(struct FastTree *ft);
46
+ struct NodeList *newNodeList(int size);
47
+ struct NodeTree *newNodeTree(int size);
48
+ void freeNodeList(struct NodeList *nl);
49
+ void freeNodeTree(struct NodeTree *nt);
50
+ struct DistMatrix *newDistMatrix(int size);
51
+ void freeDistMatrix(struct DistMatrix *dm);
52
+ struct NodeTree *allShortestPathTrees(const struct NodeTree *tree);
53
+ weight_t calculateWeightedScore(const struct FastTree *ft, const struct QuartetList *ql);
54
+ struct FastTree *makeMutatedCopy(const struct FastTree *ft);
55
+ void findPath(struct NodeList *result, const struct FastTree *ft, qbase_t a, qbase_t b);
56
+ struct QuartetList *makeFullQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best);
57
+ struct QuartetList *makeBestQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best);
58
+ void freeQuartetList(struct QuartetList *ql);
59
+ void push_back(struct NodeList *nl, qbase_t which);
60
+
61
+ #endif
62
+