complearn 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,399 @@
1
+ #include <stdio.h>
2
+ #include <assert.h>
3
+ #include <ctype.h>
4
+ #include <stdlib.h>
5
+ #include <unistd.h>
6
+ #include <sys/types.h>
7
+ #include <malloc.h>
8
+ #include <sys/time.h>
9
+
10
+ #include "ruby.h"
11
+ #include "Quartet.h"
12
+
13
+ void printTree(const struct NodeTree *nt)
14
+ {
15
+ int i, j;
16
+ printf("Tree size: %d\n", nt->size);
17
+ for (i = 0; i < nt->size; ++i) {
18
+ const struct NodeList *cur = &nt->nl[i];
19
+ for (j = 0; j < cur->size; ++j) {
20
+ printf("%d->%d, ", i, cur->n[j]);
21
+ }
22
+ }
23
+ printf("\n");
24
+ }
25
+
26
+ struct NodeTree *newNodeTree(int size)
27
+ {
28
+ // m2.
29
+ struct NodeTree *result = (struct NodeTree *) calloc(sizeof(struct NodeTree), 1);
30
+ result->size = size;
31
+ // m12.
32
+ result->nl = (struct NodeList *) calloc(sizeof(struct NodeList), size);
33
+ return result;
34
+ }
35
+
36
+ void freeNodeList(struct NodeList *nl)
37
+ {
38
+ nl->size = 0;
39
+ // f12.
40
+ free(nl);
41
+ }
42
+
43
+ void freeNodeTree(struct NodeTree *nt)
44
+ {
45
+ freeNodeList(nt->nl);
46
+ nt->nl = NULL;
47
+ nt->size = 0;
48
+ // f2.
49
+ free(nt);
50
+ }
51
+
52
+ /* Allocate a new DistMatrix square of side length size, or size*size places */
53
+ struct DistMatrix *newDistMatrix(int size)
54
+ {
55
+ struct DistMatrix *dm;
56
+ int i;
57
+
58
+ // m3.
59
+ dm = (struct DistMatrix *) calloc(sizeof(struct DistMatrix), 1);
60
+ dm->size = size;
61
+
62
+ // m4.
63
+ dm->vals = (weight_t **) calloc(sizeof(weight_t *), size);
64
+
65
+ // m5.
66
+ for (i = 0; i < size; ++i)
67
+ dm->vals[i] = (weight_t *) calloc(sizeof(weight_t), size);
68
+
69
+ return dm;
70
+ }
71
+
72
+ /* Free a DistMatrix */
73
+ void freeDistMatrix(struct DistMatrix *dm)
74
+ {
75
+ int i;
76
+ // f5.
77
+ for (i = 0; i < dm->size; ++i)
78
+ free(dm->vals[i]);
79
+ // f4.
80
+ free(dm->vals);
81
+ dm->vals = NULL;
82
+ // f3.
83
+ free(dm);
84
+ }
85
+
86
+ /*
87
+ * Calculate the cost of quartet q relative to a distance matrix dm
88
+ * using the formula Cost = LeftSubtreeDistance + RightSubtreeDistance
89
+ */
90
+ weight_t calculateCost(struct Quartet *q, const struct DistMatrix *dm)
91
+ {
92
+ int i;
93
+ for (i = 0; i < 4; ++i)
94
+ assert(dm->size > q->q[i]);
95
+ return dm->vals[q->q[0]][q->q[1]] + dm->vals[q->q[2]][q->q[3]];
96
+ }
97
+
98
+ /*
99
+ * Create a list of all 3 possible quartet arrangements of four nodes.
100
+ * Copy these into the area pointed to by result.
101
+ */
102
+ void makeAllQuartetsFromLabels(int i,int j,int k,int l,
103
+ const struct DistMatrix *dm, struct Quartet *result)
104
+ {
105
+ struct Quartet cand[3] = {
106
+ { 0, { i, j, k, l } },
107
+ { 0, { i, k, j, l } },
108
+ { 0, { i, l, j, k } }
109
+ };
110
+ int c;
111
+ for (c = 0; c < 3; ++c) {
112
+ cand[c].weight = -calculateCost(cand+c, dm);
113
+ result[c] = cand[c];
114
+ }
115
+ }
116
+
117
+ /*
118
+ * Make just the best quartet possible for this unordered set of four labels.
119
+ * Copy this into the result area.
120
+ */
121
+ void makeSingleQuartetFromLabels(int i,int j,int k,int l,
122
+ const struct DistMatrix *dm, struct Quartet *result)
123
+ {
124
+ struct Quartet cand[3] = {
125
+ { 0, { i, j, k, l } },
126
+ { 0, { i, k, j, l } },
127
+ { 0, { i, l, j, k } }
128
+ };
129
+
130
+ int c;
131
+ int flag;
132
+
133
+ for (c = 0; c < 3; ++c)
134
+ cand[c].weight = calculateCost(cand+c, dm);
135
+
136
+ /* Bubble-sort the three weights */
137
+ do {
138
+ flag = 0;
139
+ for (c = 0; c < 2; ++c)
140
+ if (cand[c].weight > cand[c+1].weight) {
141
+ struct Quartet tmp = cand[c];
142
+ cand[c] = cand[c+1];
143
+ cand[c+1] = tmp;
144
+ flag = 1;
145
+ }
146
+ } while (flag == 1);
147
+ // Make weight equal cost difference between top two candidates
148
+ cand[0].weight = cand[1].weight - cand[0].weight;
149
+ *result = cand[0];
150
+ }
151
+
152
+ /* Return the number of ways to choose k items from a set of n without ordering */
153
+ unsigned int choose(unsigned int n, unsigned int k)
154
+ {
155
+ int i;
156
+ int acc = 1;
157
+ assert(n >= k);
158
+ for (i = n; i > n-k; i--)
159
+ acc *= i;
160
+ for (i = 2; i <= k; i++)
161
+ acc /= i;
162
+ return acc;
163
+ }
164
+
165
+ /*
166
+ * Make complete (all 3*n) quartet list for this weight matrix.
167
+ */
168
+ struct QuartetList *makeFullQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best)
169
+ {
170
+ int i, j, k, l;
171
+ weight_t wmin, wmax;
172
+ int m;
173
+ int c = dm->size;
174
+ unsigned int curindex = 0;
175
+ // m6.
176
+ struct QuartetList *result = (struct QuartetList *) calloc(sizeof(struct QuartetList), 1);
177
+ result->num = 3 * choose(dm->size, 4);
178
+ // m7.
179
+ result->q = (struct Quartet *) calloc(sizeof(struct Quartet), result->num);
180
+ *worst = *best = 0.0;
181
+ for (i = 0; i < c; ++i)
182
+ for (j = i+1; j < c; ++j)
183
+ for (k = j+1; k < c; ++k)
184
+ for (l = k+1; l < c; ++l) {
185
+ makeAllQuartetsFromLabels(i,j,k,l,dm,result->q+curindex);
186
+ wmin = wmax = result->q[curindex].weight;
187
+ for (m = 1; m < 3; ++m) {
188
+ if (wmin > result->q[curindex+m].weight)
189
+ wmin = result->q[curindex+m].weight;
190
+ if (wmax < result->q[curindex+m].weight)
191
+ wmax = result->q[curindex+m].weight;
192
+ }
193
+ curindex += 3;
194
+ *worst += wmin;
195
+ *best += wmax;
196
+ }
197
+ return result;
198
+ }
199
+
200
+ /*
201
+ * Make a list of just the best quartets for this distance matrix
202
+ * This is 1/3 of the size of the full list
203
+ */
204
+ struct QuartetList *makeBestQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best)
205
+ {
206
+ int i, j, k, l, c;
207
+ weight_t total = 0;
208
+ unsigned int curindex = 0;
209
+ // m8.
210
+ struct QuartetList *result = (struct QuartetList *) calloc(sizeof(struct QuartetList), 1);
211
+ result->num = choose(dm->size, 4);
212
+ // m9.
213
+ result->q = (struct Quartet *) calloc(sizeof(struct Quartet), result->num);
214
+ c = dm->size;
215
+ for (i = 0; i < c; ++i)
216
+ for (j = i+1; j < c; ++j)
217
+ for (k = j+1; k < c; ++k)
218
+ for (l = k+1; l < c; ++l) {
219
+ makeSingleQuartetFromLabels(i,j,k,l,dm,result->q+curindex);
220
+ total += result->q[curindex].weight;
221
+ curindex += 1;
222
+ }
223
+ *worst = 0.0;
224
+ *best = total;
225
+ return result;
226
+ }
227
+
228
+ static qbase_t pop_front(struct NodeList *nl)
229
+ {
230
+ qbase_t result;
231
+ assert(nl->size > 0);
232
+ result = nl->n[0];
233
+ nl->size -= 1;
234
+ memmove(nl->n, nl->n+1, sizeof(nl->n[0])*nl->size);
235
+ return result;
236
+ }
237
+
238
+ void push_back(struct NodeList *nl, qbase_t which)
239
+ {
240
+ nl->n[nl->size] = which;
241
+ nl->size += 1;
242
+ }
243
+
244
+ /*
245
+ * Calculate the shortest path map for a given root node.
246
+ * For a tree of n nodes, the map from r will look like this:
247
+ * result[r] == r (only the root node points to itself)
248
+ * for any other x != r, result[x] is the unique neighboring node whose
249
+ * distance to r is exactly 1 less than x's.
250
+ *
251
+ */
252
+ void shortestPathTree(qbase_t root, const struct NodeTree *tree, struct NodeList *result)
253
+ {
254
+ struct NodeList pending;
255
+ struct NodeList visitted;
256
+ pending.size = visitted.size = 0;
257
+ push_back(&pending, root);
258
+ memset(visitted.n, 0, sizeof(visitted.n[0]) * tree->size);
259
+ visitted.n[root] = 1;
260
+ result->n[root] = root;
261
+ while (pending.size > 0) {
262
+ qbase_t from = pop_front(&pending);
263
+ int i;
264
+ for (i = 0; i < tree->nl[from].size; ++i) {
265
+ qbase_t cur = tree->nl[from].n[i];
266
+ if (!visitted.n[cur]) {
267
+ visitted.n[cur] = 1;
268
+ push_back(&pending, cur);
269
+ result->n[cur] = from;
270
+ }
271
+ }
272
+ }
273
+ }
274
+
275
+ /*
276
+ * Calculate the shortest path tree for all nodes in this tree and
277
+ * return this result as a list of NodeList as in shortestPathTree.
278
+ * That is, result[0] will contain the shortest path map rooted at node 0,
279
+ * result[1] will be rooted at node 1, and so on.
280
+ */
281
+ struct NodeTree *allShortestPathTrees(const struct NodeTree *tree)
282
+ {
283
+ struct NodeTree *result = newNodeTree(tree->size);
284
+ qbase_t i;
285
+ for (i = 0; i < tree->size; ++i) {
286
+ shortestPathTree(i, tree, &result->nl[i]);
287
+ }
288
+ return result;
289
+ }
290
+
291
+ /*
292
+ * Finds the (shortest and only) path starting at a and going to b.
293
+ * Returns this as a list of the following form:
294
+ * a .. n1 .. n2 .. b
295
+ * or if a == b, then
296
+ * a
297
+ */
298
+ void findPath(struct NodeList *result, const struct FastTree *ft, qbase_t a, qbase_t b)
299
+ {
300
+ const struct NodeList *nl = &ft->spm->nl[b];
301
+ result->size = 0;
302
+ while (a != b) {
303
+ push_back(result, a);
304
+ a = nl->n[a];
305
+ }
306
+ push_back(result, a);
307
+ }
308
+
309
+ /*
310
+ * Returns 1 iff the two paths a and b intersect.
311
+ */
312
+ int pathsIntersect(const struct NodeList *a, const struct NodeList *b)
313
+ {
314
+ int i, j;
315
+ for (i = 0; i < a->size; i++)
316
+ for (j = 0; j < b->size; j++)
317
+ if (a->n[i] == b->n[j])
318
+ return 1;
319
+ return 0;
320
+ }
321
+
322
+ int quartetCompatible(const struct Quartet* q, const struct FastTree *ft)
323
+ {
324
+ struct NodeList pa, pb;
325
+ findPath(&pa, ft, q->q[0], q->q[1]);
326
+ findPath(&pb, ft, q->q[2], q->q[3]);
327
+ return !pathsIntersect(&pa,&pb);
328
+ }
329
+
330
+ weight_t calculateWeightedScore(const struct FastTree *ft, const struct QuartetList *ql)
331
+ {
332
+ weight_t acc=0;
333
+ unsigned int i;
334
+ for (i = 0; i < ql->num; ++i) {
335
+ if (quartetCompatible(&ql->q[i], ft))
336
+ acc += ql->q[i].weight;
337
+ }
338
+ return acc;
339
+ }
340
+
341
+ void removeFromList(struct NodeList *nl, qbase_t me)
342
+ {
343
+ int i;
344
+ int oldsize = nl->size;
345
+ nl->size = 0;
346
+ for (i = 0; i < oldsize; ++i) {
347
+ qbase_t cur = nl->n[i];
348
+ if (cur != me)
349
+ push_back(nl, cur);
350
+ }
351
+ }
352
+
353
+ void disconnectNodes(struct FastTree *ft, qbase_t a, qbase_t b)
354
+ {
355
+ removeFromList(&ft->tree->nl[a], b);
356
+ removeFromList(&ft->tree->nl[b], a);
357
+ }
358
+
359
+ void connectNodes(struct FastTree *ft, qbase_t a, qbase_t b)
360
+ {
361
+ push_back(&ft->tree->nl[a], b);
362
+ push_back(&ft->tree->nl[b], a);
363
+ }
364
+
365
+ void freeQuartetList(struct QuartetList *ql)
366
+ {
367
+ // f7. f6. or f9. f8.
368
+ free(ql->q);
369
+ ql->q = NULL;
370
+ free(ql);
371
+ }
372
+
373
+ struct FastTree *newFastTree(int size)
374
+ {
375
+ // m10.
376
+ struct FastTree *result = (struct FastTree *) calloc(sizeof(struct FastTree), 1);
377
+ result->spec = size;
378
+ result->score = 0;
379
+ result->tree = newNodeTree(size);
380
+ result->spm = NULL;
381
+ return result;
382
+ }
383
+
384
+ void freeFastTree(struct FastTree *ft)
385
+ {
386
+ if (ft->tree) {
387
+ freeNodeTree(ft->tree);
388
+ ft->tree = NULL;
389
+ }
390
+ if (ft->spm) {
391
+ freeNodeTree(ft->spm);
392
+ ft->spm = NULL;
393
+ }
394
+ ft->spec = 0;
395
+ ft->score = 0;
396
+ // f10.
397
+ free(ft);
398
+ }
399
+
@@ -0,0 +1,62 @@
1
+ #ifndef __MQUARTET_H
2
+ #define __MQUARTET_H
3
+
4
+ #define MAXNODES 1024
5
+
6
+ #include <sys/types.h>
7
+
8
+ typedef unsigned char qbase_t;
9
+ typedef double weight_t;
10
+ typedef double dist_t;
11
+
12
+ struct Quartet {
13
+ weight_t weight;
14
+ qbase_t q[4];
15
+ };
16
+
17
+ struct NodeList {
18
+ size_t size;
19
+ qbase_t n[MAXNODES];
20
+ };
21
+
22
+ struct NodeTree {
23
+ size_t size;
24
+ struct NodeList *nl;
25
+ };
26
+
27
+ struct DistMatrix {
28
+ size_t size;
29
+ double **vals;
30
+ };
31
+
32
+ struct QuartetList {
33
+ size_t num;
34
+ struct Quartet *q;
35
+ };
36
+
37
+ struct FastTree {
38
+ int spec;
39
+ weight_t score;
40
+ struct NodeTree *tree, *spm;
41
+ };
42
+
43
+
44
+ struct FastTree *newFastTree(int size);
45
+ void freeFastTree(struct FastTree *ft);
46
+ struct NodeList *newNodeList(int size);
47
+ struct NodeTree *newNodeTree(int size);
48
+ void freeNodeList(struct NodeList *nl);
49
+ void freeNodeTree(struct NodeTree *nt);
50
+ struct DistMatrix *newDistMatrix(int size);
51
+ void freeDistMatrix(struct DistMatrix *dm);
52
+ struct NodeTree *allShortestPathTrees(const struct NodeTree *tree);
53
+ weight_t calculateWeightedScore(const struct FastTree *ft, const struct QuartetList *ql);
54
+ struct FastTree *makeMutatedCopy(const struct FastTree *ft);
55
+ void findPath(struct NodeList *result, const struct FastTree *ft, qbase_t a, qbase_t b);
56
+ struct QuartetList *makeFullQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best);
57
+ struct QuartetList *makeBestQuartetList(const struct DistMatrix *dm, weight_t *worst, weight_t *best);
58
+ void freeQuartetList(struct QuartetList *ql);
59
+ void push_back(struct NodeList *nl, qbase_t which);
60
+
61
+ #endif
62
+