complearn 0.6.2
Sign up to get free protection for your applications and to get access to all the features.
- data/AUTHORS +13 -0
- data/COPYING +340 -0
- data/ChangeLog +0 -0
- data/INSTALL +231 -0
- data/Makefile +352 -0
- data/Makefile.am +76 -0
- data/Makefile.in +352 -0
- data/NEWS +7 -0
- data/README +0 -0
- data/aclocal.m4 +104 -0
- data/bin/Makefile +209 -0
- data/bin/Makefile.am +8 -0
- data/bin/Makefile.in +209 -0
- data/bin/labeltree +68 -0
- data/bin/labeltree.in +68 -0
- data/bin/makesvm +70 -0
- data/bin/makesvm.in +70 -0
- data/bin/maketree +98 -0
- data/bin/maketree.in +98 -0
- data/bin/ncd +43 -0
- data/bin/ncd.in +43 -0
- data/bin/ncdmatrix +54 -0
- data/bin/ncdmatrix.in +54 -0
- data/bin/ncdvector +50 -0
- data/bin/ncdvector.in +50 -0
- data/complearn-0.6.2.gem +0 -0
- data/complearn.gemspec +57 -0
- data/config.log +597 -0
- data/config.status +1082 -0
- data/configure +4922 -0
- data/configure.ac +91 -0
- data/confstat5FpLBf/config.h +65 -0
- data/confstat5FpLBf/subs-1.sed +50 -0
- data/confstat5FpLBf/subs-2.sed +13 -0
- data/confstat5FpLBf/subs.frag +0 -0
- data/confstat5FpLBf/subs.sed +59 -0
- data/confstat5FpLBf/undefs.sed +24 -0
- data/doc/FAQ.txt +67 -0
- data/doc/Makefile +286 -0
- data/doc/Makefile.am +11 -0
- data/doc/Makefile.in +286 -0
- data/doc/devguide.txt +15 -0
- data/doc/example.complearnrc +14 -0
- data/doc/examples.txt +35 -0
- data/doc/man/Makefile +255 -0
- data/doc/man/Makefile.am +11 -0
- data/doc/man/Makefile.in +255 -0
- data/doc/man/complearn.5 +91 -0
- data/doc/man/labeltree.1 +35 -0
- data/doc/man/makesvm.1 +60 -0
- data/doc/man/maketree.1 +58 -0
- data/doc/man/ncd.1 +51 -0
- data/doc/man/ncdmatrix.1 +40 -0
- data/doc/man/ncdvector.1 +42 -0
- data/doc/readme.txt +101 -0
- data/doc/userguide.txt +46 -0
- data/examples/genes/blueWhale.txt +1 -0
- data/examples/genes/cat.txt +1 -0
- data/examples/genes/chimpanzee.txt +1 -0
- data/examples/genes/finWhale.txt +1 -0
- data/examples/genes/graySeal.txt +1 -0
- data/examples/genes/harborSeal.txt +1 -0
- data/examples/genes/horse.txt +1 -0
- data/examples/genes/human.txt +1 -0
- data/examples/genes/mouse.txt +1 -0
- data/examples/genes/rat.txt +1 -0
- data/ext/Makefile +167 -0
- data/ext/Quartet.c +399 -0
- data/ext/Quartet.h +62 -0
- data/ext/TreeScore.c +244 -0
- data/ext/TreeScore.h +3 -0
- data/ext/config.h +65 -0
- data/ext/config.h.in +64 -0
- data/ext/extconf.rb +3 -0
- data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
- data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
- data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
- data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
- data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
- data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
- data/ext/lib/CompLearnLib/Ncd.rb +248 -0
- data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
- data/ext/lib/CompLearnLib/Task.rb +39 -0
- data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
- data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
- data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
- data/ext/lib/CompLearnLib/Tree.rb +300 -0
- data/install-sh +294 -0
- data/missing +336 -0
- data/mkinstalldirs +111 -0
- data/o +24 -0
- data/scripts/CompLearn.iss +89 -0
- data/scripts/CompLearn.iss.in +89 -0
- data/scripts/debian/changelog +6 -0
- data/scripts/debian/control +14 -0
- data/scripts/makeSetup.sh +23 -0
- data/scripts/makeSetup.sh.in +23 -0
- data/scripts/makedeb.zsh +46 -0
- data/scripts/makedeb.zsh.in +46 -0
- data/tests/alltests.rb +2 -0
- data/tests/bz2test.rb +516 -0
- data/tests/sshagent-test.rb +48 -0
- data/tests/tests.rb +275 -0
- metadata +164 -0
data/ext/TreeScore.c
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include "TreeScore.h"
|
3
|
+
#include "Quartet.h"
|
4
|
+
#include <malloc.h>
|
5
|
+
#include "ruby.h"
|
6
|
+
|
7
|
+
#ifndef VALUEFUNC
|
8
|
+
#define VALUEFUNC(x) RUBY_METHOD_FUNC(x)
|
9
|
+
#endif
|
10
|
+
|
11
|
+
static VALUE cTreeScore;
|
12
|
+
|
13
|
+
/*
|
14
|
+
* Holds a list of quartets with associated weights.
|
15
|
+
* Keeps track of worst and best possible scores using this list.
|
16
|
+
*/
|
17
|
+
struct TreeScore {
|
18
|
+
weight_t worst, best;
|
19
|
+
weight_t penalty;
|
20
|
+
struct QuartetList *ql;
|
21
|
+
};
|
22
|
+
|
23
|
+
/*
|
24
|
+
* Allocate a new TreeScore
|
25
|
+
*/
|
26
|
+
struct TreeScore *newTreeScore()
|
27
|
+
{
|
28
|
+
// m11.
|
29
|
+
struct TreeScore *result = calloc(sizeof(struct TreeScore), 1);
|
30
|
+
result->penalty = 0.0;
|
31
|
+
return result;
|
32
|
+
}
|
33
|
+
/*
|
34
|
+
* Free a C/C++ TreeScore object
|
35
|
+
*/
|
36
|
+
static void ts_free(void *vts)
|
37
|
+
{
|
38
|
+
freeTreeScore((struct TreeScore *) vts);
|
39
|
+
}
|
40
|
+
|
41
|
+
void freeTreeScore(struct TreeScore *ts)
|
42
|
+
{
|
43
|
+
// f11.
|
44
|
+
freeQuartetList(ts->ql);
|
45
|
+
ts->ql = 0;
|
46
|
+
free(ts);
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
/*
|
51
|
+
* Convert a Ruby Tree into a C/C++ FastTree pointer
|
52
|
+
*/
|
53
|
+
struct FastTree *convertTreeFromRuby(VALUE tree)
|
54
|
+
{
|
55
|
+
VALUE edges = rb_iv_get(tree, "@edges");
|
56
|
+
int translation[MAXNODES];
|
57
|
+
int nodeCount = RARRAY(edges)->len;
|
58
|
+
int speciesCount = (nodeCount + 2) / 2;
|
59
|
+
struct FastTree *ft = newFastTree(nodeCount);
|
60
|
+
int currentKernel=0, currentSpecies=0;
|
61
|
+
int i, j;
|
62
|
+
ft->spec = speciesCount;
|
63
|
+
for (i = 0; i < nodeCount; ++i) {
|
64
|
+
VALUE neighbors = rb_ary_entry(edges, i);
|
65
|
+
int ns = RARRAY(neighbors)->len;
|
66
|
+
switch (ns) {
|
67
|
+
case 1:
|
68
|
+
translation[i] = currentSpecies++;
|
69
|
+
break;
|
70
|
+
case 3:
|
71
|
+
translation[i] = speciesCount + currentKernel++;
|
72
|
+
break;
|
73
|
+
default:
|
74
|
+
assert("Bad tree!" && 0);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
for (i = 0; i < nodeCount; ++i) {
|
78
|
+
VALUE neighbors = rb_ary_entry(edges, i);
|
79
|
+
int ns = RARRAY(neighbors)->len;
|
80
|
+
for (j = 0; j < ns; ++j) {
|
81
|
+
VALUE curn = rb_ary_entry(neighbors, j);
|
82
|
+
push_back(&ft->tree->nl[translation[i]], translation[NUM2INT(curn)]);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
ft->spm = allShortestPathTrees(ft->tree);
|
86
|
+
return ft;
|
87
|
+
}
|
88
|
+
|
89
|
+
/*
|
90
|
+
* Convert a Ruby 2-dimensional array into a C/C++ DistMatrix
|
91
|
+
*/
|
92
|
+
struct DistMatrix *convertDistMatrixFromRuby(VALUE dm)
|
93
|
+
{
|
94
|
+
int size = RARRAY(dm)->len;
|
95
|
+
struct DistMatrix *rdm = newDistMatrix(size);
|
96
|
+
int i, j;
|
97
|
+
for (i = 0; i < size; ++i) {
|
98
|
+
for (j = 0; j < size; ++j) {
|
99
|
+
weight_t val = NUM2DBL(rb_ary_entry(rb_ary_entry(dm, i),j));
|
100
|
+
rdm->vals[i][j] = val;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
return rdm;
|
104
|
+
}
|
105
|
+
|
106
|
+
/*
|
107
|
+
* Makes a list consisting of only the best quartets (1/3 as many as full)
|
108
|
+
*/
|
109
|
+
static VALUE ts_makeBest(VALUE cl, VALUE dm)
|
110
|
+
{
|
111
|
+
struct TreeScore *ts = newTreeScore();
|
112
|
+
VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
|
113
|
+
struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
|
114
|
+
ts->ql = makeBestQuartetList(gdm, &ts->worst, &ts->best);
|
115
|
+
freeDistMatrix(gdm);
|
116
|
+
return tdata;
|
117
|
+
}
|
118
|
+
|
119
|
+
/*
|
120
|
+
* Makes a list of every quartet with corresponding cost.
|
121
|
+
*/
|
122
|
+
static VALUE ts_makeFull(VALUE cl, VALUE dm)
|
123
|
+
{
|
124
|
+
struct TreeScore *ts = newTreeScore();
|
125
|
+
VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
|
126
|
+
struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
|
127
|
+
ts->ql = makeFullQuartetList(gdm, &ts->worst, &ts->best);
|
128
|
+
freeDistMatrix(gdm);
|
129
|
+
return tdata;
|
130
|
+
}
|
131
|
+
|
132
|
+
/*
|
133
|
+
* Return the worst possible tree cost total. This is used to calculate S(T).
|
134
|
+
*/
|
135
|
+
static VALUE ts_worst(VALUE self)
|
136
|
+
{
|
137
|
+
struct TreeScore *ts;
|
138
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
139
|
+
return rb_float_new(ts->worst);
|
140
|
+
}
|
141
|
+
|
142
|
+
/*
|
143
|
+
* Return the best possible tree cost total. This is used to calculate S(T).
|
144
|
+
*/
|
145
|
+
static VALUE ts_best(VALUE self)
|
146
|
+
{
|
147
|
+
struct TreeScore *ts;
|
148
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
149
|
+
return rb_float_new(ts->best);
|
150
|
+
}
|
151
|
+
|
152
|
+
/*
|
153
|
+
* Returns the number of neighbors who are leaves to a given node
|
154
|
+
*/
|
155
|
+
int countLeafNeighbors(const struct FastTree *ft, qbase_t which)
|
156
|
+
{
|
157
|
+
int nc = ft->tree->nl[which].size;
|
158
|
+
int i;
|
159
|
+
int leafCount = 0;
|
160
|
+
for (i = 0; i < nc; ++i)
|
161
|
+
if (ft->tree->nl[ft->tree->nl[which].n[i]].size == 1)
|
162
|
+
leafCount++;
|
163
|
+
return leafCount;
|
164
|
+
}
|
165
|
+
|
166
|
+
/*
|
167
|
+
* Returns the number of nodes that are unpaired
|
168
|
+
*/
|
169
|
+
int countBadNodes(const struct FastTree *ft)
|
170
|
+
{
|
171
|
+
int i;
|
172
|
+
int badCount = 0;
|
173
|
+
for (i = ft->spec; i < ft->tree->size; ++i)
|
174
|
+
if (countLeafNeighbors(ft, i) == 1)
|
175
|
+
badCount++;
|
176
|
+
return badCount;
|
177
|
+
}
|
178
|
+
|
179
|
+
static VALUE ts_penaltyeq(VALUE self, VALUE val)
|
180
|
+
{
|
181
|
+
struct TreeScore *ts;
|
182
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
183
|
+
ts->penalty = NUM2DBL(val);
|
184
|
+
return rb_float_new(ts->penalty);
|
185
|
+
}
|
186
|
+
|
187
|
+
static VALUE ts_penalty(VALUE self)
|
188
|
+
{
|
189
|
+
struct TreeScore *ts;
|
190
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
191
|
+
return rb_float_new(ts->penalty);
|
192
|
+
}
|
193
|
+
|
194
|
+
/*
|
195
|
+
* Return a tree's S(T) score, between 0.0 (worst) and 1.0 (best), (penalty may force it outside range)
|
196
|
+
*/
|
197
|
+
void printFastTree(struct FastTree *ft)
|
198
|
+
{
|
199
|
+
int i, j;
|
200
|
+
printf("Tree size: %d\n", ft->tree->size);
|
201
|
+
for (i = 0; i < ft->tree->size; ++i) {
|
202
|
+
const struct NodeList *cur = &ft->tree->nl[i];
|
203
|
+
printf("Node %d: %d neighbors, %d leaf-neighbors: ", i, ft->tree->nl[i].size, countLeafNeighbors(ft, i));
|
204
|
+
for (j = 0; j < cur->size; ++j) {
|
205
|
+
printf("%d->%d, ", i, cur->n[j]);
|
206
|
+
}
|
207
|
+
printf("\n");
|
208
|
+
}
|
209
|
+
printf("\n");
|
210
|
+
}
|
211
|
+
|
212
|
+
static VALUE ts_score(VALUE self, VALUE tree)
|
213
|
+
{
|
214
|
+
int badNodeCount;
|
215
|
+
struct TreeScore *ts;
|
216
|
+
struct FastTree *ft = convertTreeFromRuby(tree);
|
217
|
+
weight_t score;
|
218
|
+
float result, radj;
|
219
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
220
|
+
score = calculateWeightedScore(ft, ts->ql);
|
221
|
+
result = (score - ts->worst) / (ts->best - ts->worst);
|
222
|
+
badNodeCount = countBadNodes(ft);
|
223
|
+
radj = - ts->penalty * countBadNodes(ft);
|
224
|
+
freeFastTree(ft);
|
225
|
+
return rb_float_new(result + radj);
|
226
|
+
}
|
227
|
+
|
228
|
+
void initTreeScore()
|
229
|
+
{
|
230
|
+
cTreeScore = rb_define_class("TreeScore", rb_cObject);
|
231
|
+
rb_define_singleton_method(cTreeScore, "makeBestList", VALUEFUNC(ts_makeBest), 1);
|
232
|
+
rb_define_singleton_method(cTreeScore, "makeFullList", VALUEFUNC(ts_makeFull), 1);
|
233
|
+
rb_define_method(cTreeScore, "penalty", VALUEFUNC(ts_penalty), 0);
|
234
|
+
rb_define_method(cTreeScore, "penalty=", VALUEFUNC(ts_penaltyeq), 1);
|
235
|
+
rb_define_method(cTreeScore, "score", VALUEFUNC(ts_score), 1);
|
236
|
+
rb_define_method(cTreeScore, "worst", VALUEFUNC(ts_worst), 0);
|
237
|
+
rb_define_method(cTreeScore, "best", VALUEFUNC(ts_best), 0);
|
238
|
+
}
|
239
|
+
|
240
|
+
void Init_CompLearn()
|
241
|
+
{
|
242
|
+
initTreeScore();
|
243
|
+
}
|
244
|
+
|
data/ext/TreeScore.h
ADDED
data/ext/config.h
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
/* ext/config.h. Generated by configure. */
|
2
|
+
/* ext/config.h.in. Generated from configure.ac by autoheader. */
|
3
|
+
|
4
|
+
/* Define to 1 if you have the <inttypes.h> header file. */
|
5
|
+
#define HAVE_INTTYPES_H 1
|
6
|
+
|
7
|
+
/* Define to 1 if you have the <memory.h> header file. */
|
8
|
+
#define HAVE_MEMORY_H 1
|
9
|
+
|
10
|
+
/* Define to 1 if stdbool.h conforms to C99. */
|
11
|
+
/* #undef HAVE_STDBOOL_H */
|
12
|
+
|
13
|
+
/* Define to 1 if you have the <stdint.h> header file. */
|
14
|
+
#define HAVE_STDINT_H 1
|
15
|
+
|
16
|
+
/* Define to 1 if you have the <stdlib.h> header file. */
|
17
|
+
#define HAVE_STDLIB_H 1
|
18
|
+
|
19
|
+
/* Define to 1 if you have the <strings.h> header file. */
|
20
|
+
#define HAVE_STRINGS_H 1
|
21
|
+
|
22
|
+
/* Define to 1 if you have the <string.h> header file. */
|
23
|
+
#define HAVE_STRING_H 1
|
24
|
+
|
25
|
+
/* Define to 1 if you have the <sys/stat.h> header file. */
|
26
|
+
#define HAVE_SYS_STAT_H 1
|
27
|
+
|
28
|
+
/* Define to 1 if you have the <sys/time.h> header file. */
|
29
|
+
#define HAVE_SYS_TIME_H 1
|
30
|
+
|
31
|
+
/* Define to 1 if you have the <sys/types.h> header file. */
|
32
|
+
#define HAVE_SYS_TYPES_H 1
|
33
|
+
|
34
|
+
/* Define to 1 if you have the <unistd.h> header file. */
|
35
|
+
#define HAVE_UNISTD_H 1
|
36
|
+
|
37
|
+
/* Define to 1 if the system has the type `_Bool'. */
|
38
|
+
/* #undef HAVE__BOOL */
|
39
|
+
|
40
|
+
/* Name of package */
|
41
|
+
#define PACKAGE "complearn"
|
42
|
+
|
43
|
+
/* Define to the address where bug reports for this package should be sent. */
|
44
|
+
#define PACKAGE_BUGREPORT ""
|
45
|
+
|
46
|
+
/* Define to the full name of this package. */
|
47
|
+
#define PACKAGE_NAME ""
|
48
|
+
|
49
|
+
/* Define to the full name and version of this package. */
|
50
|
+
#define PACKAGE_STRING ""
|
51
|
+
|
52
|
+
/* Define to the one symbol short name of this package. */
|
53
|
+
#define PACKAGE_TARNAME ""
|
54
|
+
|
55
|
+
/* Define to the version of this package. */
|
56
|
+
#define PACKAGE_VERSION ""
|
57
|
+
|
58
|
+
/* Define to 1 if you have the ANSI C header files. */
|
59
|
+
#define STDC_HEADERS 1
|
60
|
+
|
61
|
+
/* Version number of package */
|
62
|
+
#define VERSION "0.6.2"
|
63
|
+
|
64
|
+
/* Define to empty if `const' does not conform to ANSI C. */
|
65
|
+
/* #undef const */
|
data/ext/config.h.in
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
/* ext/config.h.in. Generated from configure.ac by autoheader. */
|
2
|
+
|
3
|
+
/* Define to 1 if you have the <inttypes.h> header file. */
|
4
|
+
#undef HAVE_INTTYPES_H
|
5
|
+
|
6
|
+
/* Define to 1 if you have the <memory.h> header file. */
|
7
|
+
#undef HAVE_MEMORY_H
|
8
|
+
|
9
|
+
/* Define to 1 if stdbool.h conforms to C99. */
|
10
|
+
#undef HAVE_STDBOOL_H
|
11
|
+
|
12
|
+
/* Define to 1 if you have the <stdint.h> header file. */
|
13
|
+
#undef HAVE_STDINT_H
|
14
|
+
|
15
|
+
/* Define to 1 if you have the <stdlib.h> header file. */
|
16
|
+
#undef HAVE_STDLIB_H
|
17
|
+
|
18
|
+
/* Define to 1 if you have the <strings.h> header file. */
|
19
|
+
#undef HAVE_STRINGS_H
|
20
|
+
|
21
|
+
/* Define to 1 if you have the <string.h> header file. */
|
22
|
+
#undef HAVE_STRING_H
|
23
|
+
|
24
|
+
/* Define to 1 if you have the <sys/stat.h> header file. */
|
25
|
+
#undef HAVE_SYS_STAT_H
|
26
|
+
|
27
|
+
/* Define to 1 if you have the <sys/time.h> header file. */
|
28
|
+
#undef HAVE_SYS_TIME_H
|
29
|
+
|
30
|
+
/* Define to 1 if you have the <sys/types.h> header file. */
|
31
|
+
#undef HAVE_SYS_TYPES_H
|
32
|
+
|
33
|
+
/* Define to 1 if you have the <unistd.h> header file. */
|
34
|
+
#undef HAVE_UNISTD_H
|
35
|
+
|
36
|
+
/* Define to 1 if the system has the type `_Bool'. */
|
37
|
+
#undef HAVE__BOOL
|
38
|
+
|
39
|
+
/* Name of package */
|
40
|
+
#undef PACKAGE
|
41
|
+
|
42
|
+
/* Define to the address where bug reports for this package should be sent. */
|
43
|
+
#undef PACKAGE_BUGREPORT
|
44
|
+
|
45
|
+
/* Define to the full name of this package. */
|
46
|
+
#undef PACKAGE_NAME
|
47
|
+
|
48
|
+
/* Define to the full name and version of this package. */
|
49
|
+
#undef PACKAGE_STRING
|
50
|
+
|
51
|
+
/* Define to the one symbol short name of this package. */
|
52
|
+
#undef PACKAGE_TARNAME
|
53
|
+
|
54
|
+
/* Define to the version of this package. */
|
55
|
+
#undef PACKAGE_VERSION
|
56
|
+
|
57
|
+
/* Define to 1 if you have the ANSI C header files. */
|
58
|
+
#undef STDC_HEADERS
|
59
|
+
|
60
|
+
/* Version number of package */
|
61
|
+
#undef VERSION
|
62
|
+
|
63
|
+
/* Define to empty if `const' does not conform to ANSI C. */
|
64
|
+
#undef const
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
#
|
2
|
+
# CLConfig
|
3
|
+
#
|
4
|
+
# Reads a given configuration file of the following form:
|
5
|
+
#
|
6
|
+
# Compressor: [BZ2,..]
|
7
|
+
# InputDir: [input directory]
|
8
|
+
# OutputDir: [output directory]
|
9
|
+
# WorkDir: [working directory]
|
10
|
+
# Symmetric: [yes/no]
|
11
|
+
# Hosts: [list of hosts, seperated by ,'s]
|
12
|
+
#
|
13
|
+
# Comments in the configuration file are prefixed with #
|
14
|
+
# individual values are accessed by configObj.[value]
|
15
|
+
# Note: this will exit if an error is encountered
|
16
|
+
#
|
17
|
+
# $Id: CLConfig.rb,v 1.5 2003/11/27 16:13:46 cilibrar Exp $
|
18
|
+
#
|
19
|
+
|
20
|
+
require 'yaml'
|
21
|
+
require 'CompLearnLib/FoundComp.rb'
|
22
|
+
|
23
|
+
class CLConfig
|
24
|
+
|
25
|
+
# The allowed config variables
|
26
|
+
|
27
|
+
@@BASECONFIGVARS =
|
28
|
+
[ # prettyName isArray Type Default Value
|
29
|
+
[ 'Compressor' , false, String, FoundComp.defaultCompressor() ],
|
30
|
+
[ 'CompressorCommand' , false, String, 'gzip -c -' ],
|
31
|
+
[ 'InputDir' , false, String, 'in' ],
|
32
|
+
[ 'OutputDir' , false, String, 'out' ],
|
33
|
+
[ 'WorkDir' , false, String, 'work' ],
|
34
|
+
[ 'Symmetric' , false, TrueClass, true ],
|
35
|
+
[ 'Hosts' , true, String, ['localhost'] ],
|
36
|
+
[ 'SingleProcess' , false, TrueClass, true ],
|
37
|
+
# maketree
|
38
|
+
[ 'UnpairedPenalty',false, Float, 0.0 ],
|
39
|
+
[ 'UseBestThirdOnly', false, TrueClass, false ],
|
40
|
+
[ 'MaxFailedTries', false, Integer, 100 ],
|
41
|
+
[ 'TreesPerTry', false, Integer, 1000 ],
|
42
|
+
[ 'InternalNodePrefix', false, String, 'n' ],
|
43
|
+
]
|
44
|
+
|
45
|
+
# A class-method (since self is in class/module-scope, it refers to
|
46
|
+
# the Class object CLConfig). This one just tries to find a configuration
|
47
|
+
# file in a default spot, either in $HOME or /etc
|
48
|
+
# Also, this method caches the first read config object as a Singleton.
|
49
|
+
|
50
|
+
def self.singleUser?()
|
51
|
+
defined?(@@SINGLEUSER)
|
52
|
+
end
|
53
|
+
def self.setSingleUser()
|
54
|
+
@@SINGLEUSER = true
|
55
|
+
end
|
56
|
+
def self.setDefaultConfig(cfg)
|
57
|
+
@@DEFCON = cfg
|
58
|
+
end
|
59
|
+
def self.getDefaultConfig
|
60
|
+
unless defined?(@@DEFCON)
|
61
|
+
homedir = ENV['HOME'] || '/home'
|
62
|
+
filename = 'complearnrc'
|
63
|
+
goodcon = nil
|
64
|
+
[ "#{homedir}/.#{filename}", "/etc/#{filename}" ].each { |pathname|
|
65
|
+
next unless File.exist?(pathname)
|
66
|
+
goodcon = CLConfig.new(pathname)
|
67
|
+
break if goodcon
|
68
|
+
}
|
69
|
+
goodcon = CLConfig.new(nil) unless goodcon
|
70
|
+
@@DEFCON = goodcon
|
71
|
+
end
|
72
|
+
@@DEFCON
|
73
|
+
end
|
74
|
+
@@CONFIGVARS = { }
|
75
|
+
@@BASECONFIGVARS.each { | prettyName, isArray, typ, defval |
|
76
|
+
lowerName = prettyName.downcase
|
77
|
+
methodName = prettyName[0..0].downcase + prettyName[1..-1]
|
78
|
+
# accessing methods
|
79
|
+
if methodName == 'hosts'
|
80
|
+
def hosts() (ARGV.include?('-s') || CLConfig.singleUser?) ?
|
81
|
+
[@hosts[0]] : @hosts
|
82
|
+
end
|
83
|
+
else
|
84
|
+
module_eval "def #{methodName}() @#{methodName} end"
|
85
|
+
end
|
86
|
+
module_eval "def is#{prettyName}?() #{methodName} end" if typ == TrueClass
|
87
|
+
@@CONFIGVARS[lowerName] = [ isArray, typ, defval, prettyName, methodName ]
|
88
|
+
}
|
89
|
+
#
|
90
|
+
# constructor, read file
|
91
|
+
#
|
92
|
+
def initialize(filename)
|
93
|
+
@@CONFIGVARS.each_value { |a|
|
94
|
+
methodName, defVal = a[4], a[2]
|
95
|
+
instance_eval("@#{methodName} = defVal")
|
96
|
+
}
|
97
|
+
if filename
|
98
|
+
@filename = filename
|
99
|
+
readCLConfig()
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.writeDefaultConfigFile(fname)
|
104
|
+
cfgmap = { }
|
105
|
+
@@CONFIGVARS.each { |lowername, stuff|
|
106
|
+
isArray, typ, defval, prettyName, methodName = stuff
|
107
|
+
cfgmap[prettyName] = defval
|
108
|
+
}
|
109
|
+
File.open(fname, "w") { |f|
|
110
|
+
f.write(cfgmap.to_yaml)
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# read in the configuration file
|
116
|
+
#
|
117
|
+
def readCLConfig()
|
118
|
+
begin
|
119
|
+
yamlcfg = YAML::parse(File::new(@filename,"r").read)
|
120
|
+
rescue
|
121
|
+
print "Unable to read config file " + @filename + ": " + $!
|
122
|
+
print "\n"
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
|
126
|
+
#fail "@filename must be a map not a #{yamlcfg.type_id}" unless yamlcfg.type_id.to_s == 'map'
|
127
|
+
|
128
|
+
yamlcfg.value.each { |itagname, whatnot|
|
129
|
+
tagname, value = itagname.downcase, whatnot[1].value
|
130
|
+
if @@CONFIGVARS.has_key?(tagname)
|
131
|
+
isArray,typ,default, pretty, methodName = @@CONFIGVARS[tagname]
|
132
|
+
if isArray
|
133
|
+
result = value.map { |i| parseValue(typ, i.value) }
|
134
|
+
else
|
135
|
+
result = parseValue(typ, value)
|
136
|
+
end
|
137
|
+
instance_eval("@#{methodName}=result")
|
138
|
+
else
|
139
|
+
fail "Unknown configuration option: #{itagname}"
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
def parseValue(typ, val)
|
145
|
+
val = val.clone
|
146
|
+
# remove whitespace
|
147
|
+
val.gsub!(/^\s+/,'')
|
148
|
+
val.gsub!(/\s+$/,'')
|
149
|
+
case typ.to_s # to_s necessary because === checks is_a? for Class
|
150
|
+
|
151
|
+
when 'TrueClass'
|
152
|
+
return val =~ /true/i || val =~ /yes/i || val =~ /on/i
|
153
|
+
|
154
|
+
when 'String'
|
155
|
+
return val
|
156
|
+
|
157
|
+
when 'Float'
|
158
|
+
return val.to_f
|
159
|
+
|
160
|
+
when 'Integer'
|
161
|
+
return val.to_i
|
162
|
+
|
163
|
+
else
|
164
|
+
puts "Illegal type: #{typ} for value #{val}"
|
165
|
+
exit(1)
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
# Searches for a file or directory specified by fname
|
172
|
+
# if fname is relative, it will search for it in the following
|
173
|
+
# order:
|
174
|
+
# inputDir specified in this CLConfig object
|
175
|
+
# current working directory
|
176
|
+
#
|
177
|
+
# If found, a string is returned with the absolute (full) pathname.
|
178
|
+
# If not, an exception is raised
|
179
|
+
def findInputFile(fname)
|
180
|
+
raise "fname can not be nil" if fname == nil
|
181
|
+
maybe = File.expand_path(fname, self.inputDir)
|
182
|
+
return maybe if File.exist?(maybe)
|
183
|
+
maybe = File.expand_path(fname)
|
184
|
+
return maybe if File.exist?(maybe)
|
185
|
+
raise "Cannot find file to open: #{fname}"
|
186
|
+
end
|
187
|
+
|
188
|
+
def readTaggedFileList(fname)
|
189
|
+
training = [ ]
|
190
|
+
features = [ ]
|
191
|
+
testing = [ ]
|
192
|
+
f = File.open(fname, 'r')
|
193
|
+
while line = f.gets
|
194
|
+
line.chomp!
|
195
|
+
next if line =~ /^\s*#/
|
196
|
+
next unless line =~ /[\S]/
|
197
|
+
if line =~ /^\s*(\S+)\s+(\S)\s+([^\s#]+)/
|
198
|
+
tagnum, tagtype, fname = $1.to_i, $2.downcase, $3
|
199
|
+
realfname = findInputFile(fname)
|
200
|
+
if tagtype == 'p'
|
201
|
+
testing << [ tagnum.to_i, realfname ]
|
202
|
+
elsif tagtype == 'f'
|
203
|
+
features << realfname
|
204
|
+
else # tagtype == 'g'
|
205
|
+
training << [ tagnum.to_i, realfname ]
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
return [ training, features, testing ]
|
210
|
+
end
|
211
|
+
def getFilelistFromDirOrFile(objname)
|
212
|
+
files = [ ]
|
213
|
+
if File.ftype(objname) == 'directory'
|
214
|
+
d = Dir.new(objname)
|
215
|
+
d.each { |f|
|
216
|
+
goodf = "#{objname}/#{f}"
|
217
|
+
files << goodf if File.ftype(goodf) == 'file'
|
218
|
+
}
|
219
|
+
files.sort!
|
220
|
+
else
|
221
|
+
files = readFileList(objname)
|
222
|
+
end
|
223
|
+
files
|
224
|
+
end
|
225
|
+
def readFileList(fname)
|
226
|
+
f = File.open(fname, 'r')
|
227
|
+
result = [ ]
|
228
|
+
while line = f.gets
|
229
|
+
line.chomp!
|
230
|
+
next if line =~ /^#/
|
231
|
+
next unless line =~ /[\S]/
|
232
|
+
realfname = findInputFile(line)
|
233
|
+
result << realfname
|
234
|
+
end
|
235
|
+
result
|
236
|
+
end
|
237
|
+
def self.printVersionAndExit()
|
238
|
+
puts "CompLearn #{FoundComp::VERSION}"
|
239
|
+
exit(0)
|
240
|
+
end
|
241
|
+
end
|