complearn 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,244 @@
1
+ #include <assert.h>
2
+ #include "TreeScore.h"
3
+ #include "Quartet.h"
4
+ #include <malloc.h>
5
+ #include "ruby.h"
6
+
7
+ #ifndef VALUEFUNC
8
+ #define VALUEFUNC(x) RUBY_METHOD_FUNC(x)
9
+ #endif
10
+
11
+ static VALUE cTreeScore;
12
+
13
+ /*
14
+ * Holds a list of quartets with associated weights.
15
+ * Keeps track of worst and best possible scores using this list.
16
+ */
17
+ struct TreeScore {
18
+ weight_t worst, best;
19
+ weight_t penalty;
20
+ struct QuartetList *ql;
21
+ };
22
+
23
+ /*
24
+ * Allocate a new TreeScore
25
+ */
26
+ struct TreeScore *newTreeScore()
27
+ {
28
+ // m11.
29
+ struct TreeScore *result = calloc(sizeof(struct TreeScore), 1);
30
+ result->penalty = 0.0;
31
+ return result;
32
+ }
33
+ /*
34
+ * Free a C/C++ TreeScore object
35
+ */
36
+ static void ts_free(void *vts)
37
+ {
38
+ freeTreeScore((struct TreeScore *) vts);
39
+ }
40
+
41
+ void freeTreeScore(struct TreeScore *ts)
42
+ {
43
+ // f11.
44
+ freeQuartetList(ts->ql);
45
+ ts->ql = 0;
46
+ free(ts);
47
+ }
48
+
49
+
50
+ /*
51
+ * Convert a Ruby Tree into a C/C++ FastTree pointer
52
+ */
53
+ struct FastTree *convertTreeFromRuby(VALUE tree)
54
+ {
55
+ VALUE edges = rb_iv_get(tree, "@edges");
56
+ int translation[MAXNODES];
57
+ int nodeCount = RARRAY(edges)->len;
58
+ int speciesCount = (nodeCount + 2) / 2;
59
+ struct FastTree *ft = newFastTree(nodeCount);
60
+ int currentKernel=0, currentSpecies=0;
61
+ int i, j;
62
+ ft->spec = speciesCount;
63
+ for (i = 0; i < nodeCount; ++i) {
64
+ VALUE neighbors = rb_ary_entry(edges, i);
65
+ int ns = RARRAY(neighbors)->len;
66
+ switch (ns) {
67
+ case 1:
68
+ translation[i] = currentSpecies++;
69
+ break;
70
+ case 3:
71
+ translation[i] = speciesCount + currentKernel++;
72
+ break;
73
+ default:
74
+ assert("Bad tree!" && 0);
75
+ }
76
+ }
77
+ for (i = 0; i < nodeCount; ++i) {
78
+ VALUE neighbors = rb_ary_entry(edges, i);
79
+ int ns = RARRAY(neighbors)->len;
80
+ for (j = 0; j < ns; ++j) {
81
+ VALUE curn = rb_ary_entry(neighbors, j);
82
+ push_back(&ft->tree->nl[translation[i]], translation[NUM2INT(curn)]);
83
+ }
84
+ }
85
+ ft->spm = allShortestPathTrees(ft->tree);
86
+ return ft;
87
+ }
88
+
89
+ /*
90
+ * Convert a Ruby 2-dimensional array into a C/C++ DistMatrix
91
+ */
92
+ struct DistMatrix *convertDistMatrixFromRuby(VALUE dm)
93
+ {
94
+ int size = RARRAY(dm)->len;
95
+ struct DistMatrix *rdm = newDistMatrix(size);
96
+ int i, j;
97
+ for (i = 0; i < size; ++i) {
98
+ for (j = 0; j < size; ++j) {
99
+ weight_t val = NUM2DBL(rb_ary_entry(rb_ary_entry(dm, i),j));
100
+ rdm->vals[i][j] = val;
101
+ }
102
+ }
103
+ return rdm;
104
+ }
105
+
106
+ /*
107
+ * Makes a list consisting of only the best quartets (1/3 as many as full)
108
+ */
109
+ static VALUE ts_makeBest(VALUE cl, VALUE dm)
110
+ {
111
+ struct TreeScore *ts = newTreeScore();
112
+ VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
113
+ struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
114
+ ts->ql = makeBestQuartetList(gdm, &ts->worst, &ts->best);
115
+ freeDistMatrix(gdm);
116
+ return tdata;
117
+ }
118
+
119
+ /*
120
+ * Makes a list of every quartet with corresponding cost.
121
+ */
122
+ static VALUE ts_makeFull(VALUE cl, VALUE dm)
123
+ {
124
+ struct TreeScore *ts = newTreeScore();
125
+ VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
126
+ struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
127
+ ts->ql = makeFullQuartetList(gdm, &ts->worst, &ts->best);
128
+ freeDistMatrix(gdm);
129
+ return tdata;
130
+ }
131
+
132
+ /*
133
+ * Return the worst possible tree cost total. This is used to calculate S(T).
134
+ */
135
+ static VALUE ts_worst(VALUE self)
136
+ {
137
+ struct TreeScore *ts;
138
+ Data_Get_Struct(self, struct TreeScore, ts);
139
+ return rb_float_new(ts->worst);
140
+ }
141
+
142
+ /*
143
+ * Return the best possible tree cost total. This is used to calculate S(T).
144
+ */
145
+ static VALUE ts_best(VALUE self)
146
+ {
147
+ struct TreeScore *ts;
148
+ Data_Get_Struct(self, struct TreeScore, ts);
149
+ return rb_float_new(ts->best);
150
+ }
151
+
152
+ /*
153
+ * Returns the number of neighbors who are leaves to a given node
154
+ */
155
+ int countLeafNeighbors(const struct FastTree *ft, qbase_t which)
156
+ {
157
+ int nc = ft->tree->nl[which].size;
158
+ int i;
159
+ int leafCount = 0;
160
+ for (i = 0; i < nc; ++i)
161
+ if (ft->tree->nl[ft->tree->nl[which].n[i]].size == 1)
162
+ leafCount++;
163
+ return leafCount;
164
+ }
165
+
166
+ /*
167
+ * Returns the number of nodes that are unpaired
168
+ */
169
+ int countBadNodes(const struct FastTree *ft)
170
+ {
171
+ int i;
172
+ int badCount = 0;
173
+ for (i = ft->spec; i < ft->tree->size; ++i)
174
+ if (countLeafNeighbors(ft, i) == 1)
175
+ badCount++;
176
+ return badCount;
177
+ }
178
+
179
+ static VALUE ts_penaltyeq(VALUE self, VALUE val)
180
+ {
181
+ struct TreeScore *ts;
182
+ Data_Get_Struct(self, struct TreeScore, ts);
183
+ ts->penalty = NUM2DBL(val);
184
+ return rb_float_new(ts->penalty);
185
+ }
186
+
187
+ static VALUE ts_penalty(VALUE self)
188
+ {
189
+ struct TreeScore *ts;
190
+ Data_Get_Struct(self, struct TreeScore, ts);
191
+ return rb_float_new(ts->penalty);
192
+ }
193
+
194
+ /*
195
+ * Return a tree's S(T) score, between 0.0 (worst) and 1.0 (best), (penalty may force it outside range)
196
+ */
197
+ void printFastTree(struct FastTree *ft)
198
+ {
199
+ int i, j;
200
+ printf("Tree size: %d\n", ft->tree->size);
201
+ for (i = 0; i < ft->tree->size; ++i) {
202
+ const struct NodeList *cur = &ft->tree->nl[i];
203
+ printf("Node %d: %d neighbors, %d leaf-neighbors: ", i, ft->tree->nl[i].size, countLeafNeighbors(ft, i));
204
+ for (j = 0; j < cur->size; ++j) {
205
+ printf("%d->%d, ", i, cur->n[j]);
206
+ }
207
+ printf("\n");
208
+ }
209
+ printf("\n");
210
+ }
211
+
212
+ static VALUE ts_score(VALUE self, VALUE tree)
213
+ {
214
+ int badNodeCount;
215
+ struct TreeScore *ts;
216
+ struct FastTree *ft = convertTreeFromRuby(tree);
217
+ weight_t score;
218
+ float result, radj;
219
+ Data_Get_Struct(self, struct TreeScore, ts);
220
+ score = calculateWeightedScore(ft, ts->ql);
221
+ result = (score - ts->worst) / (ts->best - ts->worst);
222
+ badNodeCount = countBadNodes(ft);
223
+ radj = - ts->penalty * countBadNodes(ft);
224
+ freeFastTree(ft);
225
+ return rb_float_new(result + radj);
226
+ }
227
+
228
+ void initTreeScore()
229
+ {
230
+ cTreeScore = rb_define_class("TreeScore", rb_cObject);
231
+ rb_define_singleton_method(cTreeScore, "makeBestList", VALUEFUNC(ts_makeBest), 1);
232
+ rb_define_singleton_method(cTreeScore, "makeFullList", VALUEFUNC(ts_makeFull), 1);
233
+ rb_define_method(cTreeScore, "penalty", VALUEFUNC(ts_penalty), 0);
234
+ rb_define_method(cTreeScore, "penalty=", VALUEFUNC(ts_penaltyeq), 1);
235
+ rb_define_method(cTreeScore, "score", VALUEFUNC(ts_score), 1);
236
+ rb_define_method(cTreeScore, "worst", VALUEFUNC(ts_worst), 0);
237
+ rb_define_method(cTreeScore, "best", VALUEFUNC(ts_best), 0);
238
+ }
239
+
240
+ void Init_CompLearn()
241
+ {
242
+ initTreeScore();
243
+ }
244
+
@@ -0,0 +1,3 @@
1
+ struct TreeScore;
2
+ void initTreeScore();
3
+ void freeTreeScore(struct TreeScore *ts);
@@ -0,0 +1,65 @@
1
+ /* ext/config.h. Generated by configure. */
2
+ /* ext/config.h.in. Generated from configure.ac by autoheader. */
3
+
4
+ /* Define to 1 if you have the <inttypes.h> header file. */
5
+ #define HAVE_INTTYPES_H 1
6
+
7
+ /* Define to 1 if you have the <memory.h> header file. */
8
+ #define HAVE_MEMORY_H 1
9
+
10
+ /* Define to 1 if stdbool.h conforms to C99. */
11
+ /* #undef HAVE_STDBOOL_H */
12
+
13
+ /* Define to 1 if you have the <stdint.h> header file. */
14
+ #define HAVE_STDINT_H 1
15
+
16
+ /* Define to 1 if you have the <stdlib.h> header file. */
17
+ #define HAVE_STDLIB_H 1
18
+
19
+ /* Define to 1 if you have the <strings.h> header file. */
20
+ #define HAVE_STRINGS_H 1
21
+
22
+ /* Define to 1 if you have the <string.h> header file. */
23
+ #define HAVE_STRING_H 1
24
+
25
+ /* Define to 1 if you have the <sys/stat.h> header file. */
26
+ #define HAVE_SYS_STAT_H 1
27
+
28
+ /* Define to 1 if you have the <sys/time.h> header file. */
29
+ #define HAVE_SYS_TIME_H 1
30
+
31
+ /* Define to 1 if you have the <sys/types.h> header file. */
32
+ #define HAVE_SYS_TYPES_H 1
33
+
34
+ /* Define to 1 if you have the <unistd.h> header file. */
35
+ #define HAVE_UNISTD_H 1
36
+
37
+ /* Define to 1 if the system has the type `_Bool'. */
38
+ /* #undef HAVE__BOOL */
39
+
40
+ /* Name of package */
41
+ #define PACKAGE "complearn"
42
+
43
+ /* Define to the address where bug reports for this package should be sent. */
44
+ #define PACKAGE_BUGREPORT ""
45
+
46
+ /* Define to the full name of this package. */
47
+ #define PACKAGE_NAME ""
48
+
49
+ /* Define to the full name and version of this package. */
50
+ #define PACKAGE_STRING ""
51
+
52
+ /* Define to the one symbol short name of this package. */
53
+ #define PACKAGE_TARNAME ""
54
+
55
+ /* Define to the version of this package. */
56
+ #define PACKAGE_VERSION ""
57
+
58
+ /* Define to 1 if you have the ANSI C header files. */
59
+ #define STDC_HEADERS 1
60
+
61
+ /* Version number of package */
62
+ #define VERSION "0.6.2"
63
+
64
+ /* Define to empty if `const' does not conform to ANSI C. */
65
+ /* #undef const */
@@ -0,0 +1,64 @@
1
+ /* ext/config.h.in. Generated from configure.ac by autoheader. */
2
+
3
+ /* Define to 1 if you have the <inttypes.h> header file. */
4
+ #undef HAVE_INTTYPES_H
5
+
6
+ /* Define to 1 if you have the <memory.h> header file. */
7
+ #undef HAVE_MEMORY_H
8
+
9
+ /* Define to 1 if stdbool.h conforms to C99. */
10
+ #undef HAVE_STDBOOL_H
11
+
12
+ /* Define to 1 if you have the <stdint.h> header file. */
13
+ #undef HAVE_STDINT_H
14
+
15
+ /* Define to 1 if you have the <stdlib.h> header file. */
16
+ #undef HAVE_STDLIB_H
17
+
18
+ /* Define to 1 if you have the <strings.h> header file. */
19
+ #undef HAVE_STRINGS_H
20
+
21
+ /* Define to 1 if you have the <string.h> header file. */
22
+ #undef HAVE_STRING_H
23
+
24
+ /* Define to 1 if you have the <sys/stat.h> header file. */
25
+ #undef HAVE_SYS_STAT_H
26
+
27
+ /* Define to 1 if you have the <sys/time.h> header file. */
28
+ #undef HAVE_SYS_TIME_H
29
+
30
+ /* Define to 1 if you have the <sys/types.h> header file. */
31
+ #undef HAVE_SYS_TYPES_H
32
+
33
+ /* Define to 1 if you have the <unistd.h> header file. */
34
+ #undef HAVE_UNISTD_H
35
+
36
+ /* Define to 1 if the system has the type `_Bool'. */
37
+ #undef HAVE__BOOL
38
+
39
+ /* Name of package */
40
+ #undef PACKAGE
41
+
42
+ /* Define to the address where bug reports for this package should be sent. */
43
+ #undef PACKAGE_BUGREPORT
44
+
45
+ /* Define to the full name of this package. */
46
+ #undef PACKAGE_NAME
47
+
48
+ /* Define to the full name and version of this package. */
49
+ #undef PACKAGE_STRING
50
+
51
+ /* Define to the one symbol short name of this package. */
52
+ #undef PACKAGE_TARNAME
53
+
54
+ /* Define to the version of this package. */
55
+ #undef PACKAGE_VERSION
56
+
57
+ /* Define to 1 if you have the ANSI C header files. */
58
+ #undef STDC_HEADERS
59
+
60
+ /* Version number of package */
61
+ #undef VERSION
62
+
63
+ /* Define to empty if `const' does not conform to ANSI C. */
64
+ #undef const
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ #have_library('bz2')
3
+ create_makefile('CompLearn')
@@ -0,0 +1,241 @@
1
+ #
2
+ # CLConfig
3
+ #
4
+ # Reads a given configuration file of the following form:
5
+ #
6
+ # Compressor: [BZ2,..]
7
+ # InputDir: [input directory]
8
+ # OutputDir: [output directory]
9
+ # WorkDir: [working directory]
10
+ # Symmetric: [yes/no]
11
+ # Hosts: [list of hosts, seperated by ,'s]
12
+ #
13
+ # Comments in the configuration file are prefixed with #
14
+ # individual values are accessed by configObj.[value]
15
+ # Note: this will exit if an error is encountered
16
+ #
17
+ # $Id: CLConfig.rb,v 1.5 2003/11/27 16:13:46 cilibrar Exp $
18
+ #
19
+
20
+ require 'yaml'
21
+ require 'CompLearnLib/FoundComp.rb'
22
+
23
+ class CLConfig
24
+
25
+ # The allowed config variables
26
+
27
+ @@BASECONFIGVARS =
28
+ [ # prettyName isArray Type Default Value
29
+ [ 'Compressor' , false, String, FoundComp.defaultCompressor() ],
30
+ [ 'CompressorCommand' , false, String, 'gzip -c -' ],
31
+ [ 'InputDir' , false, String, 'in' ],
32
+ [ 'OutputDir' , false, String, 'out' ],
33
+ [ 'WorkDir' , false, String, 'work' ],
34
+ [ 'Symmetric' , false, TrueClass, true ],
35
+ [ 'Hosts' , true, String, ['localhost'] ],
36
+ [ 'SingleProcess' , false, TrueClass, true ],
37
+ # maketree
38
+ [ 'UnpairedPenalty',false, Float, 0.0 ],
39
+ [ 'UseBestThirdOnly', false, TrueClass, false ],
40
+ [ 'MaxFailedTries', false, Integer, 100 ],
41
+ [ 'TreesPerTry', false, Integer, 1000 ],
42
+ [ 'InternalNodePrefix', false, String, 'n' ],
43
+ ]
44
+
45
+ # A class-method (since self is in class/module-scope, it refers to
46
+ # the Class object CLConfig). This one just tries to find a configuration
47
+ # file in a default spot, either in $HOME or /etc
48
+ # Also, this method caches the first read config object as a Singleton.
49
+
50
+ def self.singleUser?()
51
+ defined?(@@SINGLEUSER)
52
+ end
53
+ def self.setSingleUser()
54
+ @@SINGLEUSER = true
55
+ end
56
+ def self.setDefaultConfig(cfg)
57
+ @@DEFCON = cfg
58
+ end
59
+ def self.getDefaultConfig
60
+ unless defined?(@@DEFCON)
61
+ homedir = ENV['HOME'] || '/home'
62
+ filename = 'complearnrc'
63
+ goodcon = nil
64
+ [ "#{homedir}/.#{filename}", "/etc/#{filename}" ].each { |pathname|
65
+ next unless File.exist?(pathname)
66
+ goodcon = CLConfig.new(pathname)
67
+ break if goodcon
68
+ }
69
+ goodcon = CLConfig.new(nil) unless goodcon
70
+ @@DEFCON = goodcon
71
+ end
72
+ @@DEFCON
73
+ end
74
+ @@CONFIGVARS = { }
75
+ @@BASECONFIGVARS.each { | prettyName, isArray, typ, defval |
76
+ lowerName = prettyName.downcase
77
+ methodName = prettyName[0..0].downcase + prettyName[1..-1]
78
+ # accessing methods
79
+ if methodName == 'hosts'
80
+ def hosts() (ARGV.include?('-s') || CLConfig.singleUser?) ?
81
+ [@hosts[0]] : @hosts
82
+ end
83
+ else
84
+ module_eval "def #{methodName}() @#{methodName} end"
85
+ end
86
+ module_eval "def is#{prettyName}?() #{methodName} end" if typ == TrueClass
87
+ @@CONFIGVARS[lowerName] = [ isArray, typ, defval, prettyName, methodName ]
88
+ }
89
+ #
90
+ # constructor, read file
91
+ #
92
+ def initialize(filename)
93
+ @@CONFIGVARS.each_value { |a|
94
+ methodName, defVal = a[4], a[2]
95
+ instance_eval("@#{methodName} = defVal")
96
+ }
97
+ if filename
98
+ @filename = filename
99
+ readCLConfig()
100
+ end
101
+ end
102
+
103
+ def self.writeDefaultConfigFile(fname)
104
+ cfgmap = { }
105
+ @@CONFIGVARS.each { |lowername, stuff|
106
+ isArray, typ, defval, prettyName, methodName = stuff
107
+ cfgmap[prettyName] = defval
108
+ }
109
+ File.open(fname, "w") { |f|
110
+ f.write(cfgmap.to_yaml)
111
+ }
112
+ end
113
+
114
+ #
115
+ # read in the configuration file
116
+ #
117
+ def readCLConfig()
118
+ begin
119
+ yamlcfg = YAML::parse(File::new(@filename,"r").read)
120
+ rescue
121
+ print "Unable to read config file " + @filename + ": " + $!
122
+ print "\n"
123
+ exit
124
+ end
125
+
126
+ #fail "@filename must be a map not a #{yamlcfg.type_id}" unless yamlcfg.type_id.to_s == 'map'
127
+
128
+ yamlcfg.value.each { |itagname, whatnot|
129
+ tagname, value = itagname.downcase, whatnot[1].value
130
+ if @@CONFIGVARS.has_key?(tagname)
131
+ isArray,typ,default, pretty, methodName = @@CONFIGVARS[tagname]
132
+ if isArray
133
+ result = value.map { |i| parseValue(typ, i.value) }
134
+ else
135
+ result = parseValue(typ, value)
136
+ end
137
+ instance_eval("@#{methodName}=result")
138
+ else
139
+ fail "Unknown configuration option: #{itagname}"
140
+ end
141
+ }
142
+ end
143
+
144
+ def parseValue(typ, val)
145
+ val = val.clone
146
+ # remove whitespace
147
+ val.gsub!(/^\s+/,'')
148
+ val.gsub!(/\s+$/,'')
149
+ case typ.to_s # to_s necessary because === checks is_a? for Class
150
+
151
+ when 'TrueClass'
152
+ return val =~ /true/i || val =~ /yes/i || val =~ /on/i
153
+
154
+ when 'String'
155
+ return val
156
+
157
+ when 'Float'
158
+ return val.to_f
159
+
160
+ when 'Integer'
161
+ return val.to_i
162
+
163
+ else
164
+ puts "Illegal type: #{typ} for value #{val}"
165
+ exit(1)
166
+
167
+ end
168
+
169
+ end
170
+
171
+ # Searches for a file or directory specified by fname
172
+ # if fname is relative, it will search for it in the following
173
+ # order:
174
+ # inputDir specified in this CLConfig object
175
+ # current working directory
176
+ #
177
+ # If found, a string is returned with the absolute (full) pathname.
178
+ # If not, an exception is raised
179
+ def findInputFile(fname)
180
+ raise "fname can not be nil" if fname == nil
181
+ maybe = File.expand_path(fname, self.inputDir)
182
+ return maybe if File.exist?(maybe)
183
+ maybe = File.expand_path(fname)
184
+ return maybe if File.exist?(maybe)
185
+ raise "Cannot find file to open: #{fname}"
186
+ end
187
+
188
+ def readTaggedFileList(fname)
189
+ training = [ ]
190
+ features = [ ]
191
+ testing = [ ]
192
+ f = File.open(fname, 'r')
193
+ while line = f.gets
194
+ line.chomp!
195
+ next if line =~ /^\s*#/
196
+ next unless line =~ /[\S]/
197
+ if line =~ /^\s*(\S+)\s+(\S)\s+([^\s#]+)/
198
+ tagnum, tagtype, fname = $1.to_i, $2.downcase, $3
199
+ realfname = findInputFile(fname)
200
+ if tagtype == 'p'
201
+ testing << [ tagnum.to_i, realfname ]
202
+ elsif tagtype == 'f'
203
+ features << realfname
204
+ else # tagtype == 'g'
205
+ training << [ tagnum.to_i, realfname ]
206
+ end
207
+ end
208
+ end
209
+ return [ training, features, testing ]
210
+ end
211
+ def getFilelistFromDirOrFile(objname)
212
+ files = [ ]
213
+ if File.ftype(objname) == 'directory'
214
+ d = Dir.new(objname)
215
+ d.each { |f|
216
+ goodf = "#{objname}/#{f}"
217
+ files << goodf if File.ftype(goodf) == 'file'
218
+ }
219
+ files.sort!
220
+ else
221
+ files = readFileList(objname)
222
+ end
223
+ files
224
+ end
225
+ def readFileList(fname)
226
+ f = File.open(fname, 'r')
227
+ result = [ ]
228
+ while line = f.gets
229
+ line.chomp!
230
+ next if line =~ /^#/
231
+ next unless line =~ /[\S]/
232
+ realfname = findInputFile(line)
233
+ result << realfname
234
+ end
235
+ result
236
+ end
237
+ def self.printVersionAndExit()
238
+ puts "CompLearn #{FoundComp::VERSION}"
239
+ exit(0)
240
+ end
241
+ end