complearn 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +13 -0
- data/COPYING +340 -0
- data/ChangeLog +0 -0
- data/INSTALL +231 -0
- data/Makefile +352 -0
- data/Makefile.am +76 -0
- data/Makefile.in +352 -0
- data/NEWS +7 -0
- data/README +0 -0
- data/aclocal.m4 +104 -0
- data/bin/Makefile +209 -0
- data/bin/Makefile.am +8 -0
- data/bin/Makefile.in +209 -0
- data/bin/labeltree +68 -0
- data/bin/labeltree.in +68 -0
- data/bin/makesvm +70 -0
- data/bin/makesvm.in +70 -0
- data/bin/maketree +98 -0
- data/bin/maketree.in +98 -0
- data/bin/ncd +43 -0
- data/bin/ncd.in +43 -0
- data/bin/ncdmatrix +54 -0
- data/bin/ncdmatrix.in +54 -0
- data/bin/ncdvector +50 -0
- data/bin/ncdvector.in +50 -0
- data/complearn-0.6.2.gem +0 -0
- data/complearn.gemspec +57 -0
- data/config.log +597 -0
- data/config.status +1082 -0
- data/configure +4922 -0
- data/configure.ac +91 -0
- data/confstat5FpLBf/config.h +65 -0
- data/confstat5FpLBf/subs-1.sed +50 -0
- data/confstat5FpLBf/subs-2.sed +13 -0
- data/confstat5FpLBf/subs.frag +0 -0
- data/confstat5FpLBf/subs.sed +59 -0
- data/confstat5FpLBf/undefs.sed +24 -0
- data/doc/FAQ.txt +67 -0
- data/doc/Makefile +286 -0
- data/doc/Makefile.am +11 -0
- data/doc/Makefile.in +286 -0
- data/doc/devguide.txt +15 -0
- data/doc/example.complearnrc +14 -0
- data/doc/examples.txt +35 -0
- data/doc/man/Makefile +255 -0
- data/doc/man/Makefile.am +11 -0
- data/doc/man/Makefile.in +255 -0
- data/doc/man/complearn.5 +91 -0
- data/doc/man/labeltree.1 +35 -0
- data/doc/man/makesvm.1 +60 -0
- data/doc/man/maketree.1 +58 -0
- data/doc/man/ncd.1 +51 -0
- data/doc/man/ncdmatrix.1 +40 -0
- data/doc/man/ncdvector.1 +42 -0
- data/doc/readme.txt +101 -0
- data/doc/userguide.txt +46 -0
- data/examples/genes/blueWhale.txt +1 -0
- data/examples/genes/cat.txt +1 -0
- data/examples/genes/chimpanzee.txt +1 -0
- data/examples/genes/finWhale.txt +1 -0
- data/examples/genes/graySeal.txt +1 -0
- data/examples/genes/harborSeal.txt +1 -0
- data/examples/genes/horse.txt +1 -0
- data/examples/genes/human.txt +1 -0
- data/examples/genes/mouse.txt +1 -0
- data/examples/genes/rat.txt +1 -0
- data/ext/Makefile +167 -0
- data/ext/Quartet.c +399 -0
- data/ext/Quartet.h +62 -0
- data/ext/TreeScore.c +244 -0
- data/ext/TreeScore.h +3 -0
- data/ext/config.h +65 -0
- data/ext/config.h.in +64 -0
- data/ext/extconf.rb +3 -0
- data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
- data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
- data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
- data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
- data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
- data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
- data/ext/lib/CompLearnLib/Ncd.rb +248 -0
- data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
- data/ext/lib/CompLearnLib/Task.rb +39 -0
- data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
- data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
- data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
- data/ext/lib/CompLearnLib/Tree.rb +300 -0
- data/install-sh +294 -0
- data/missing +336 -0
- data/mkinstalldirs +111 -0
- data/o +24 -0
- data/scripts/CompLearn.iss +89 -0
- data/scripts/CompLearn.iss.in +89 -0
- data/scripts/debian/changelog +6 -0
- data/scripts/debian/control +14 -0
- data/scripts/makeSetup.sh +23 -0
- data/scripts/makeSetup.sh.in +23 -0
- data/scripts/makedeb.zsh +46 -0
- data/scripts/makedeb.zsh.in +46 -0
- data/tests/alltests.rb +2 -0
- data/tests/bz2test.rb +516 -0
- data/tests/sshagent-test.rb +48 -0
- data/tests/tests.rb +275 -0
- metadata +164 -0
data/ext/TreeScore.c
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
#include <assert.h>
|
2
|
+
#include "TreeScore.h"
|
3
|
+
#include "Quartet.h"
|
4
|
+
#include <malloc.h>
|
5
|
+
#include "ruby.h"
|
6
|
+
|
7
|
+
#ifndef VALUEFUNC
|
8
|
+
#define VALUEFUNC(x) RUBY_METHOD_FUNC(x)
|
9
|
+
#endif
|
10
|
+
|
11
|
+
static VALUE cTreeScore;
|
12
|
+
|
13
|
+
/*
|
14
|
+
* Holds a list of quartets with associated weights.
|
15
|
+
* Keeps track of worst and best possible scores using this list.
|
16
|
+
*/
|
17
|
+
struct TreeScore {
|
18
|
+
weight_t worst, best;
|
19
|
+
weight_t penalty;
|
20
|
+
struct QuartetList *ql;
|
21
|
+
};
|
22
|
+
|
23
|
+
/*
|
24
|
+
* Allocate a new TreeScore
|
25
|
+
*/
|
26
|
+
struct TreeScore *newTreeScore()
|
27
|
+
{
|
28
|
+
// m11.
|
29
|
+
struct TreeScore *result = calloc(sizeof(struct TreeScore), 1);
|
30
|
+
result->penalty = 0.0;
|
31
|
+
return result;
|
32
|
+
}
|
33
|
+
/*
|
34
|
+
* Free a C/C++ TreeScore object
|
35
|
+
*/
|
36
|
+
static void ts_free(void *vts)
|
37
|
+
{
|
38
|
+
freeTreeScore((struct TreeScore *) vts);
|
39
|
+
}
|
40
|
+
|
41
|
+
void freeTreeScore(struct TreeScore *ts)
|
42
|
+
{
|
43
|
+
// f11.
|
44
|
+
freeQuartetList(ts->ql);
|
45
|
+
ts->ql = 0;
|
46
|
+
free(ts);
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
/*
|
51
|
+
* Convert a Ruby Tree into a C/C++ FastTree pointer
|
52
|
+
*/
|
53
|
+
struct FastTree *convertTreeFromRuby(VALUE tree)
|
54
|
+
{
|
55
|
+
VALUE edges = rb_iv_get(tree, "@edges");
|
56
|
+
int translation[MAXNODES];
|
57
|
+
int nodeCount = RARRAY(edges)->len;
|
58
|
+
int speciesCount = (nodeCount + 2) / 2;
|
59
|
+
struct FastTree *ft = newFastTree(nodeCount);
|
60
|
+
int currentKernel=0, currentSpecies=0;
|
61
|
+
int i, j;
|
62
|
+
ft->spec = speciesCount;
|
63
|
+
for (i = 0; i < nodeCount; ++i) {
|
64
|
+
VALUE neighbors = rb_ary_entry(edges, i);
|
65
|
+
int ns = RARRAY(neighbors)->len;
|
66
|
+
switch (ns) {
|
67
|
+
case 1:
|
68
|
+
translation[i] = currentSpecies++;
|
69
|
+
break;
|
70
|
+
case 3:
|
71
|
+
translation[i] = speciesCount + currentKernel++;
|
72
|
+
break;
|
73
|
+
default:
|
74
|
+
assert("Bad tree!" && 0);
|
75
|
+
}
|
76
|
+
}
|
77
|
+
for (i = 0; i < nodeCount; ++i) {
|
78
|
+
VALUE neighbors = rb_ary_entry(edges, i);
|
79
|
+
int ns = RARRAY(neighbors)->len;
|
80
|
+
for (j = 0; j < ns; ++j) {
|
81
|
+
VALUE curn = rb_ary_entry(neighbors, j);
|
82
|
+
push_back(&ft->tree->nl[translation[i]], translation[NUM2INT(curn)]);
|
83
|
+
}
|
84
|
+
}
|
85
|
+
ft->spm = allShortestPathTrees(ft->tree);
|
86
|
+
return ft;
|
87
|
+
}
|
88
|
+
|
89
|
+
/*
|
90
|
+
* Convert a Ruby 2-dimensional array into a C/C++ DistMatrix
|
91
|
+
*/
|
92
|
+
struct DistMatrix *convertDistMatrixFromRuby(VALUE dm)
|
93
|
+
{
|
94
|
+
int size = RARRAY(dm)->len;
|
95
|
+
struct DistMatrix *rdm = newDistMatrix(size);
|
96
|
+
int i, j;
|
97
|
+
for (i = 0; i < size; ++i) {
|
98
|
+
for (j = 0; j < size; ++j) {
|
99
|
+
weight_t val = NUM2DBL(rb_ary_entry(rb_ary_entry(dm, i),j));
|
100
|
+
rdm->vals[i][j] = val;
|
101
|
+
}
|
102
|
+
}
|
103
|
+
return rdm;
|
104
|
+
}
|
105
|
+
|
106
|
+
/*
|
107
|
+
* Makes a list consisting of only the best quartets (1/3 as many as full)
|
108
|
+
*/
|
109
|
+
static VALUE ts_makeBest(VALUE cl, VALUE dm)
|
110
|
+
{
|
111
|
+
struct TreeScore *ts = newTreeScore();
|
112
|
+
VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
|
113
|
+
struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
|
114
|
+
ts->ql = makeBestQuartetList(gdm, &ts->worst, &ts->best);
|
115
|
+
freeDistMatrix(gdm);
|
116
|
+
return tdata;
|
117
|
+
}
|
118
|
+
|
119
|
+
/*
|
120
|
+
* Makes a list of every quartet with corresponding cost.
|
121
|
+
*/
|
122
|
+
static VALUE ts_makeFull(VALUE cl, VALUE dm)
|
123
|
+
{
|
124
|
+
struct TreeScore *ts = newTreeScore();
|
125
|
+
VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
|
126
|
+
struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
|
127
|
+
ts->ql = makeFullQuartetList(gdm, &ts->worst, &ts->best);
|
128
|
+
freeDistMatrix(gdm);
|
129
|
+
return tdata;
|
130
|
+
}
|
131
|
+
|
132
|
+
/*
|
133
|
+
* Return the worst possible tree cost total. This is used to calculate S(T).
|
134
|
+
*/
|
135
|
+
static VALUE ts_worst(VALUE self)
|
136
|
+
{
|
137
|
+
struct TreeScore *ts;
|
138
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
139
|
+
return rb_float_new(ts->worst);
|
140
|
+
}
|
141
|
+
|
142
|
+
/*
|
143
|
+
* Return the best possible tree cost total. This is used to calculate S(T).
|
144
|
+
*/
|
145
|
+
static VALUE ts_best(VALUE self)
|
146
|
+
{
|
147
|
+
struct TreeScore *ts;
|
148
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
149
|
+
return rb_float_new(ts->best);
|
150
|
+
}
|
151
|
+
|
152
|
+
/*
|
153
|
+
* Returns the number of neighbors who are leaves to a given node
|
154
|
+
*/
|
155
|
+
int countLeafNeighbors(const struct FastTree *ft, qbase_t which)
|
156
|
+
{
|
157
|
+
int nc = ft->tree->nl[which].size;
|
158
|
+
int i;
|
159
|
+
int leafCount = 0;
|
160
|
+
for (i = 0; i < nc; ++i)
|
161
|
+
if (ft->tree->nl[ft->tree->nl[which].n[i]].size == 1)
|
162
|
+
leafCount++;
|
163
|
+
return leafCount;
|
164
|
+
}
|
165
|
+
|
166
|
+
/*
|
167
|
+
* Returns the number of nodes that are unpaired
|
168
|
+
*/
|
169
|
+
int countBadNodes(const struct FastTree *ft)
|
170
|
+
{
|
171
|
+
int i;
|
172
|
+
int badCount = 0;
|
173
|
+
for (i = ft->spec; i < ft->tree->size; ++i)
|
174
|
+
if (countLeafNeighbors(ft, i) == 1)
|
175
|
+
badCount++;
|
176
|
+
return badCount;
|
177
|
+
}
|
178
|
+
|
179
|
+
static VALUE ts_penaltyeq(VALUE self, VALUE val)
|
180
|
+
{
|
181
|
+
struct TreeScore *ts;
|
182
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
183
|
+
ts->penalty = NUM2DBL(val);
|
184
|
+
return rb_float_new(ts->penalty);
|
185
|
+
}
|
186
|
+
|
187
|
+
static VALUE ts_penalty(VALUE self)
|
188
|
+
{
|
189
|
+
struct TreeScore *ts;
|
190
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
191
|
+
return rb_float_new(ts->penalty);
|
192
|
+
}
|
193
|
+
|
194
|
+
/*
|
195
|
+
* Return a tree's S(T) score, between 0.0 (worst) and 1.0 (best), (penalty may force it outside range)
|
196
|
+
*/
|
197
|
+
void printFastTree(struct FastTree *ft)
|
198
|
+
{
|
199
|
+
int i, j;
|
200
|
+
printf("Tree size: %d\n", ft->tree->size);
|
201
|
+
for (i = 0; i < ft->tree->size; ++i) {
|
202
|
+
const struct NodeList *cur = &ft->tree->nl[i];
|
203
|
+
printf("Node %d: %d neighbors, %d leaf-neighbors: ", i, ft->tree->nl[i].size, countLeafNeighbors(ft, i));
|
204
|
+
for (j = 0; j < cur->size; ++j) {
|
205
|
+
printf("%d->%d, ", i, cur->n[j]);
|
206
|
+
}
|
207
|
+
printf("\n");
|
208
|
+
}
|
209
|
+
printf("\n");
|
210
|
+
}
|
211
|
+
|
212
|
+
static VALUE ts_score(VALUE self, VALUE tree)
|
213
|
+
{
|
214
|
+
int badNodeCount;
|
215
|
+
struct TreeScore *ts;
|
216
|
+
struct FastTree *ft = convertTreeFromRuby(tree);
|
217
|
+
weight_t score;
|
218
|
+
float result, radj;
|
219
|
+
Data_Get_Struct(self, struct TreeScore, ts);
|
220
|
+
score = calculateWeightedScore(ft, ts->ql);
|
221
|
+
result = (score - ts->worst) / (ts->best - ts->worst);
|
222
|
+
badNodeCount = countBadNodes(ft);
|
223
|
+
radj = - ts->penalty * countBadNodes(ft);
|
224
|
+
freeFastTree(ft);
|
225
|
+
return rb_float_new(result + radj);
|
226
|
+
}
|
227
|
+
|
228
|
+
void initTreeScore()
|
229
|
+
{
|
230
|
+
cTreeScore = rb_define_class("TreeScore", rb_cObject);
|
231
|
+
rb_define_singleton_method(cTreeScore, "makeBestList", VALUEFUNC(ts_makeBest), 1);
|
232
|
+
rb_define_singleton_method(cTreeScore, "makeFullList", VALUEFUNC(ts_makeFull), 1);
|
233
|
+
rb_define_method(cTreeScore, "penalty", VALUEFUNC(ts_penalty), 0);
|
234
|
+
rb_define_method(cTreeScore, "penalty=", VALUEFUNC(ts_penaltyeq), 1);
|
235
|
+
rb_define_method(cTreeScore, "score", VALUEFUNC(ts_score), 1);
|
236
|
+
rb_define_method(cTreeScore, "worst", VALUEFUNC(ts_worst), 0);
|
237
|
+
rb_define_method(cTreeScore, "best", VALUEFUNC(ts_best), 0);
|
238
|
+
}
|
239
|
+
|
240
|
+
void Init_CompLearn()
|
241
|
+
{
|
242
|
+
initTreeScore();
|
243
|
+
}
|
244
|
+
|
data/ext/TreeScore.h
ADDED
data/ext/config.h
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
/* ext/config.h. Generated by configure. */
|
2
|
+
/* ext/config.h.in. Generated from configure.ac by autoheader. */
|
3
|
+
|
4
|
+
/* Define to 1 if you have the <inttypes.h> header file. */
|
5
|
+
#define HAVE_INTTYPES_H 1
|
6
|
+
|
7
|
+
/* Define to 1 if you have the <memory.h> header file. */
|
8
|
+
#define HAVE_MEMORY_H 1
|
9
|
+
|
10
|
+
/* Define to 1 if stdbool.h conforms to C99. */
|
11
|
+
/* #undef HAVE_STDBOOL_H */
|
12
|
+
|
13
|
+
/* Define to 1 if you have the <stdint.h> header file. */
|
14
|
+
#define HAVE_STDINT_H 1
|
15
|
+
|
16
|
+
/* Define to 1 if you have the <stdlib.h> header file. */
|
17
|
+
#define HAVE_STDLIB_H 1
|
18
|
+
|
19
|
+
/* Define to 1 if you have the <strings.h> header file. */
|
20
|
+
#define HAVE_STRINGS_H 1
|
21
|
+
|
22
|
+
/* Define to 1 if you have the <string.h> header file. */
|
23
|
+
#define HAVE_STRING_H 1
|
24
|
+
|
25
|
+
/* Define to 1 if you have the <sys/stat.h> header file. */
|
26
|
+
#define HAVE_SYS_STAT_H 1
|
27
|
+
|
28
|
+
/* Define to 1 if you have the <sys/time.h> header file. */
|
29
|
+
#define HAVE_SYS_TIME_H 1
|
30
|
+
|
31
|
+
/* Define to 1 if you have the <sys/types.h> header file. */
|
32
|
+
#define HAVE_SYS_TYPES_H 1
|
33
|
+
|
34
|
+
/* Define to 1 if you have the <unistd.h> header file. */
|
35
|
+
#define HAVE_UNISTD_H 1
|
36
|
+
|
37
|
+
/* Define to 1 if the system has the type `_Bool'. */
|
38
|
+
/* #undef HAVE__BOOL */
|
39
|
+
|
40
|
+
/* Name of package */
|
41
|
+
#define PACKAGE "complearn"
|
42
|
+
|
43
|
+
/* Define to the address where bug reports for this package should be sent. */
|
44
|
+
#define PACKAGE_BUGREPORT ""
|
45
|
+
|
46
|
+
/* Define to the full name of this package. */
|
47
|
+
#define PACKAGE_NAME ""
|
48
|
+
|
49
|
+
/* Define to the full name and version of this package. */
|
50
|
+
#define PACKAGE_STRING ""
|
51
|
+
|
52
|
+
/* Define to the one symbol short name of this package. */
|
53
|
+
#define PACKAGE_TARNAME ""
|
54
|
+
|
55
|
+
/* Define to the version of this package. */
|
56
|
+
#define PACKAGE_VERSION ""
|
57
|
+
|
58
|
+
/* Define to 1 if you have the ANSI C header files. */
|
59
|
+
#define STDC_HEADERS 1
|
60
|
+
|
61
|
+
/* Version number of package */
|
62
|
+
#define VERSION "0.6.2"
|
63
|
+
|
64
|
+
/* Define to empty if `const' does not conform to ANSI C. */
|
65
|
+
/* #undef const */
|
data/ext/config.h.in
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
/* ext/config.h.in. Generated from configure.ac by autoheader. */
|
2
|
+
|
3
|
+
/* Define to 1 if you have the <inttypes.h> header file. */
|
4
|
+
#undef HAVE_INTTYPES_H
|
5
|
+
|
6
|
+
/* Define to 1 if you have the <memory.h> header file. */
|
7
|
+
#undef HAVE_MEMORY_H
|
8
|
+
|
9
|
+
/* Define to 1 if stdbool.h conforms to C99. */
|
10
|
+
#undef HAVE_STDBOOL_H
|
11
|
+
|
12
|
+
/* Define to 1 if you have the <stdint.h> header file. */
|
13
|
+
#undef HAVE_STDINT_H
|
14
|
+
|
15
|
+
/* Define to 1 if you have the <stdlib.h> header file. */
|
16
|
+
#undef HAVE_STDLIB_H
|
17
|
+
|
18
|
+
/* Define to 1 if you have the <strings.h> header file. */
|
19
|
+
#undef HAVE_STRINGS_H
|
20
|
+
|
21
|
+
/* Define to 1 if you have the <string.h> header file. */
|
22
|
+
#undef HAVE_STRING_H
|
23
|
+
|
24
|
+
/* Define to 1 if you have the <sys/stat.h> header file. */
|
25
|
+
#undef HAVE_SYS_STAT_H
|
26
|
+
|
27
|
+
/* Define to 1 if you have the <sys/time.h> header file. */
|
28
|
+
#undef HAVE_SYS_TIME_H
|
29
|
+
|
30
|
+
/* Define to 1 if you have the <sys/types.h> header file. */
|
31
|
+
#undef HAVE_SYS_TYPES_H
|
32
|
+
|
33
|
+
/* Define to 1 if you have the <unistd.h> header file. */
|
34
|
+
#undef HAVE_UNISTD_H
|
35
|
+
|
36
|
+
/* Define to 1 if the system has the type `_Bool'. */
|
37
|
+
#undef HAVE__BOOL
|
38
|
+
|
39
|
+
/* Name of package */
|
40
|
+
#undef PACKAGE
|
41
|
+
|
42
|
+
/* Define to the address where bug reports for this package should be sent. */
|
43
|
+
#undef PACKAGE_BUGREPORT
|
44
|
+
|
45
|
+
/* Define to the full name of this package. */
|
46
|
+
#undef PACKAGE_NAME
|
47
|
+
|
48
|
+
/* Define to the full name and version of this package. */
|
49
|
+
#undef PACKAGE_STRING
|
50
|
+
|
51
|
+
/* Define to the one symbol short name of this package. */
|
52
|
+
#undef PACKAGE_TARNAME
|
53
|
+
|
54
|
+
/* Define to the version of this package. */
|
55
|
+
#undef PACKAGE_VERSION
|
56
|
+
|
57
|
+
/* Define to 1 if you have the ANSI C header files. */
|
58
|
+
#undef STDC_HEADERS
|
59
|
+
|
60
|
+
/* Version number of package */
|
61
|
+
#undef VERSION
|
62
|
+
|
63
|
+
/* Define to empty if `const' does not conform to ANSI C. */
|
64
|
+
#undef const
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,241 @@
|
|
1
|
+
#
|
2
|
+
# CLConfig
|
3
|
+
#
|
4
|
+
# Reads a given configuration file of the following form:
|
5
|
+
#
|
6
|
+
# Compressor: [BZ2,..]
|
7
|
+
# InputDir: [input directory]
|
8
|
+
# OutputDir: [output directory]
|
9
|
+
# WorkDir: [working directory]
|
10
|
+
# Symmetric: [yes/no]
|
11
|
+
# Hosts: [list of hosts, seperated by ,'s]
|
12
|
+
#
|
13
|
+
# Comments in the configuration file are prefixed with #
|
14
|
+
# individual values are accessed by configObj.[value]
|
15
|
+
# Note: this will exit if an error is encountered
|
16
|
+
#
|
17
|
+
# $Id: CLConfig.rb,v 1.5 2003/11/27 16:13:46 cilibrar Exp $
|
18
|
+
#
|
19
|
+
|
20
|
+
require 'yaml'
|
21
|
+
require 'CompLearnLib/FoundComp.rb'
|
22
|
+
|
23
|
+
class CLConfig
|
24
|
+
|
25
|
+
# The allowed config variables
|
26
|
+
|
27
|
+
@@BASECONFIGVARS =
|
28
|
+
[ # prettyName isArray Type Default Value
|
29
|
+
[ 'Compressor' , false, String, FoundComp.defaultCompressor() ],
|
30
|
+
[ 'CompressorCommand' , false, String, 'gzip -c -' ],
|
31
|
+
[ 'InputDir' , false, String, 'in' ],
|
32
|
+
[ 'OutputDir' , false, String, 'out' ],
|
33
|
+
[ 'WorkDir' , false, String, 'work' ],
|
34
|
+
[ 'Symmetric' , false, TrueClass, true ],
|
35
|
+
[ 'Hosts' , true, String, ['localhost'] ],
|
36
|
+
[ 'SingleProcess' , false, TrueClass, true ],
|
37
|
+
# maketree
|
38
|
+
[ 'UnpairedPenalty',false, Float, 0.0 ],
|
39
|
+
[ 'UseBestThirdOnly', false, TrueClass, false ],
|
40
|
+
[ 'MaxFailedTries', false, Integer, 100 ],
|
41
|
+
[ 'TreesPerTry', false, Integer, 1000 ],
|
42
|
+
[ 'InternalNodePrefix', false, String, 'n' ],
|
43
|
+
]
|
44
|
+
|
45
|
+
# A class-method (since self is in class/module-scope, it refers to
|
46
|
+
# the Class object CLConfig). This one just tries to find a configuration
|
47
|
+
# file in a default spot, either in $HOME or /etc
|
48
|
+
# Also, this method caches the first read config object as a Singleton.
|
49
|
+
|
50
|
+
def self.singleUser?()
|
51
|
+
defined?(@@SINGLEUSER)
|
52
|
+
end
|
53
|
+
def self.setSingleUser()
|
54
|
+
@@SINGLEUSER = true
|
55
|
+
end
|
56
|
+
def self.setDefaultConfig(cfg)
|
57
|
+
@@DEFCON = cfg
|
58
|
+
end
|
59
|
+
def self.getDefaultConfig
|
60
|
+
unless defined?(@@DEFCON)
|
61
|
+
homedir = ENV['HOME'] || '/home'
|
62
|
+
filename = 'complearnrc'
|
63
|
+
goodcon = nil
|
64
|
+
[ "#{homedir}/.#{filename}", "/etc/#{filename}" ].each { |pathname|
|
65
|
+
next unless File.exist?(pathname)
|
66
|
+
goodcon = CLConfig.new(pathname)
|
67
|
+
break if goodcon
|
68
|
+
}
|
69
|
+
goodcon = CLConfig.new(nil) unless goodcon
|
70
|
+
@@DEFCON = goodcon
|
71
|
+
end
|
72
|
+
@@DEFCON
|
73
|
+
end
|
74
|
+
@@CONFIGVARS = { }
|
75
|
+
@@BASECONFIGVARS.each { | prettyName, isArray, typ, defval |
|
76
|
+
lowerName = prettyName.downcase
|
77
|
+
methodName = prettyName[0..0].downcase + prettyName[1..-1]
|
78
|
+
# accessing methods
|
79
|
+
if methodName == 'hosts'
|
80
|
+
def hosts() (ARGV.include?('-s') || CLConfig.singleUser?) ?
|
81
|
+
[@hosts[0]] : @hosts
|
82
|
+
end
|
83
|
+
else
|
84
|
+
module_eval "def #{methodName}() @#{methodName} end"
|
85
|
+
end
|
86
|
+
module_eval "def is#{prettyName}?() #{methodName} end" if typ == TrueClass
|
87
|
+
@@CONFIGVARS[lowerName] = [ isArray, typ, defval, prettyName, methodName ]
|
88
|
+
}
|
89
|
+
#
|
90
|
+
# constructor, read file
|
91
|
+
#
|
92
|
+
def initialize(filename)
|
93
|
+
@@CONFIGVARS.each_value { |a|
|
94
|
+
methodName, defVal = a[4], a[2]
|
95
|
+
instance_eval("@#{methodName} = defVal")
|
96
|
+
}
|
97
|
+
if filename
|
98
|
+
@filename = filename
|
99
|
+
readCLConfig()
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.writeDefaultConfigFile(fname)
|
104
|
+
cfgmap = { }
|
105
|
+
@@CONFIGVARS.each { |lowername, stuff|
|
106
|
+
isArray, typ, defval, prettyName, methodName = stuff
|
107
|
+
cfgmap[prettyName] = defval
|
108
|
+
}
|
109
|
+
File.open(fname, "w") { |f|
|
110
|
+
f.write(cfgmap.to_yaml)
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# read in the configuration file
|
116
|
+
#
|
117
|
+
def readCLConfig()
|
118
|
+
begin
|
119
|
+
yamlcfg = YAML::parse(File::new(@filename,"r").read)
|
120
|
+
rescue
|
121
|
+
print "Unable to read config file " + @filename + ": " + $!
|
122
|
+
print "\n"
|
123
|
+
exit
|
124
|
+
end
|
125
|
+
|
126
|
+
#fail "@filename must be a map not a #{yamlcfg.type_id}" unless yamlcfg.type_id.to_s == 'map'
|
127
|
+
|
128
|
+
yamlcfg.value.each { |itagname, whatnot|
|
129
|
+
tagname, value = itagname.downcase, whatnot[1].value
|
130
|
+
if @@CONFIGVARS.has_key?(tagname)
|
131
|
+
isArray,typ,default, pretty, methodName = @@CONFIGVARS[tagname]
|
132
|
+
if isArray
|
133
|
+
result = value.map { |i| parseValue(typ, i.value) }
|
134
|
+
else
|
135
|
+
result = parseValue(typ, value)
|
136
|
+
end
|
137
|
+
instance_eval("@#{methodName}=result")
|
138
|
+
else
|
139
|
+
fail "Unknown configuration option: #{itagname}"
|
140
|
+
end
|
141
|
+
}
|
142
|
+
end
|
143
|
+
|
144
|
+
def parseValue(typ, val)
|
145
|
+
val = val.clone
|
146
|
+
# remove whitespace
|
147
|
+
val.gsub!(/^\s+/,'')
|
148
|
+
val.gsub!(/\s+$/,'')
|
149
|
+
case typ.to_s # to_s necessary because === checks is_a? for Class
|
150
|
+
|
151
|
+
when 'TrueClass'
|
152
|
+
return val =~ /true/i || val =~ /yes/i || val =~ /on/i
|
153
|
+
|
154
|
+
when 'String'
|
155
|
+
return val
|
156
|
+
|
157
|
+
when 'Float'
|
158
|
+
return val.to_f
|
159
|
+
|
160
|
+
when 'Integer'
|
161
|
+
return val.to_i
|
162
|
+
|
163
|
+
else
|
164
|
+
puts "Illegal type: #{typ} for value #{val}"
|
165
|
+
exit(1)
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
# Searches for a file or directory specified by fname
|
172
|
+
# if fname is relative, it will search for it in the following
|
173
|
+
# order:
|
174
|
+
# inputDir specified in this CLConfig object
|
175
|
+
# current working directory
|
176
|
+
#
|
177
|
+
# If found, a string is returned with the absolute (full) pathname.
|
178
|
+
# If not, an exception is raised
|
179
|
+
def findInputFile(fname)
|
180
|
+
raise "fname can not be nil" if fname == nil
|
181
|
+
maybe = File.expand_path(fname, self.inputDir)
|
182
|
+
return maybe if File.exist?(maybe)
|
183
|
+
maybe = File.expand_path(fname)
|
184
|
+
return maybe if File.exist?(maybe)
|
185
|
+
raise "Cannot find file to open: #{fname}"
|
186
|
+
end
|
187
|
+
|
188
|
+
def readTaggedFileList(fname)
|
189
|
+
training = [ ]
|
190
|
+
features = [ ]
|
191
|
+
testing = [ ]
|
192
|
+
f = File.open(fname, 'r')
|
193
|
+
while line = f.gets
|
194
|
+
line.chomp!
|
195
|
+
next if line =~ /^\s*#/
|
196
|
+
next unless line =~ /[\S]/
|
197
|
+
if line =~ /^\s*(\S+)\s+(\S)\s+([^\s#]+)/
|
198
|
+
tagnum, tagtype, fname = $1.to_i, $2.downcase, $3
|
199
|
+
realfname = findInputFile(fname)
|
200
|
+
if tagtype == 'p'
|
201
|
+
testing << [ tagnum.to_i, realfname ]
|
202
|
+
elsif tagtype == 'f'
|
203
|
+
features << realfname
|
204
|
+
else # tagtype == 'g'
|
205
|
+
training << [ tagnum.to_i, realfname ]
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
return [ training, features, testing ]
|
210
|
+
end
|
211
|
+
def getFilelistFromDirOrFile(objname)
|
212
|
+
files = [ ]
|
213
|
+
if File.ftype(objname) == 'directory'
|
214
|
+
d = Dir.new(objname)
|
215
|
+
d.each { |f|
|
216
|
+
goodf = "#{objname}/#{f}"
|
217
|
+
files << goodf if File.ftype(goodf) == 'file'
|
218
|
+
}
|
219
|
+
files.sort!
|
220
|
+
else
|
221
|
+
files = readFileList(objname)
|
222
|
+
end
|
223
|
+
files
|
224
|
+
end
|
225
|
+
def readFileList(fname)
|
226
|
+
f = File.open(fname, 'r')
|
227
|
+
result = [ ]
|
228
|
+
while line = f.gets
|
229
|
+
line.chomp!
|
230
|
+
next if line =~ /^#/
|
231
|
+
next unless line =~ /[\S]/
|
232
|
+
realfname = findInputFile(line)
|
233
|
+
result << realfname
|
234
|
+
end
|
235
|
+
result
|
236
|
+
end
|
237
|
+
def self.printVersionAndExit()
|
238
|
+
puts "CompLearn #{FoundComp::VERSION}"
|
239
|
+
exit(0)
|
240
|
+
end
|
241
|
+
end
|