complearn 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,244 @@
1
+ #include <assert.h>
2
+ #include "TreeScore.h"
3
+ #include "Quartet.h"
4
+ #include <malloc.h>
5
+ #include "ruby.h"
6
+
7
+ #ifndef VALUEFUNC
8
+ #define VALUEFUNC(x) RUBY_METHOD_FUNC(x)
9
+ #endif
10
+
11
+ static VALUE cTreeScore;
12
+
13
+ /*
14
+ * Holds a list of quartets with associated weights.
15
+ * Keeps track of worst and best possible scores using this list.
16
+ */
17
+ struct TreeScore {
18
+ weight_t worst, best;
19
+ weight_t penalty;
20
+ struct QuartetList *ql;
21
+ };
22
+
23
+ /*
24
+ * Allocate a new TreeScore
25
+ */
26
+ struct TreeScore *newTreeScore()
27
+ {
28
+ // m11.
29
+ struct TreeScore *result = calloc(sizeof(struct TreeScore), 1);
30
+ result->penalty = 0.0;
31
+ return result;
32
+ }
33
+ /*
34
+ * Free a C/C++ TreeScore object
35
+ */
36
+ static void ts_free(void *vts)
37
+ {
38
+ freeTreeScore((struct TreeScore *) vts);
39
+ }
40
+
41
+ void freeTreeScore(struct TreeScore *ts)
42
+ {
43
+ // f11.
44
+ freeQuartetList(ts->ql);
45
+ ts->ql = 0;
46
+ free(ts);
47
+ }
48
+
49
+
50
+ /*
51
+ * Convert a Ruby Tree into a C/C++ FastTree pointer
52
+ */
53
+ struct FastTree *convertTreeFromRuby(VALUE tree)
54
+ {
55
+ VALUE edges = rb_iv_get(tree, "@edges");
56
+ int translation[MAXNODES];
57
+ int nodeCount = RARRAY(edges)->len;
58
+ int speciesCount = (nodeCount + 2) / 2;
59
+ struct FastTree *ft = newFastTree(nodeCount);
60
+ int currentKernel=0, currentSpecies=0;
61
+ int i, j;
62
+ ft->spec = speciesCount;
63
+ for (i = 0; i < nodeCount; ++i) {
64
+ VALUE neighbors = rb_ary_entry(edges, i);
65
+ int ns = RARRAY(neighbors)->len;
66
+ switch (ns) {
67
+ case 1:
68
+ translation[i] = currentSpecies++;
69
+ break;
70
+ case 3:
71
+ translation[i] = speciesCount + currentKernel++;
72
+ break;
73
+ default:
74
+ assert("Bad tree!" && 0);
75
+ }
76
+ }
77
+ for (i = 0; i < nodeCount; ++i) {
78
+ VALUE neighbors = rb_ary_entry(edges, i);
79
+ int ns = RARRAY(neighbors)->len;
80
+ for (j = 0; j < ns; ++j) {
81
+ VALUE curn = rb_ary_entry(neighbors, j);
82
+ push_back(&ft->tree->nl[translation[i]], translation[NUM2INT(curn)]);
83
+ }
84
+ }
85
+ ft->spm = allShortestPathTrees(ft->tree);
86
+ return ft;
87
+ }
88
+
89
+ /*
90
+ * Convert a Ruby 2-dimensional array into a C/C++ DistMatrix
91
+ */
92
+ struct DistMatrix *convertDistMatrixFromRuby(VALUE dm)
93
+ {
94
+ int size = RARRAY(dm)->len;
95
+ struct DistMatrix *rdm = newDistMatrix(size);
96
+ int i, j;
97
+ for (i = 0; i < size; ++i) {
98
+ for (j = 0; j < size; ++j) {
99
+ weight_t val = NUM2DBL(rb_ary_entry(rb_ary_entry(dm, i),j));
100
+ rdm->vals[i][j] = val;
101
+ }
102
+ }
103
+ return rdm;
104
+ }
105
+
106
+ /*
107
+ * Makes a list consisting of only the best quartets (1/3 as many as full)
108
+ */
109
+ static VALUE ts_makeBest(VALUE cl, VALUE dm)
110
+ {
111
+ struct TreeScore *ts = newTreeScore();
112
+ VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
113
+ struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
114
+ ts->ql = makeBestQuartetList(gdm, &ts->worst, &ts->best);
115
+ freeDistMatrix(gdm);
116
+ return tdata;
117
+ }
118
+
119
+ /*
120
+ * Makes a list of every quartet with corresponding cost.
121
+ */
122
+ static VALUE ts_makeFull(VALUE cl, VALUE dm)
123
+ {
124
+ struct TreeScore *ts = newTreeScore();
125
+ VALUE tdata = Data_Wrap_Struct(cl, 0, ts_free, ts);
126
+ struct DistMatrix *gdm = convertDistMatrixFromRuby(dm);
127
+ ts->ql = makeFullQuartetList(gdm, &ts->worst, &ts->best);
128
+ freeDistMatrix(gdm);
129
+ return tdata;
130
+ }
131
+
132
+ /*
133
+ * Return the worst possible tree cost total. This is used to calculate S(T).
134
+ */
135
+ static VALUE ts_worst(VALUE self)
136
+ {
137
+ struct TreeScore *ts;
138
+ Data_Get_Struct(self, struct TreeScore, ts);
139
+ return rb_float_new(ts->worst);
140
+ }
141
+
142
+ /*
143
+ * Return the best possible tree cost total. This is used to calculate S(T).
144
+ */
145
+ static VALUE ts_best(VALUE self)
146
+ {
147
+ struct TreeScore *ts;
148
+ Data_Get_Struct(self, struct TreeScore, ts);
149
+ return rb_float_new(ts->best);
150
+ }
151
+
152
+ /*
153
+ * Returns the number of neighbors who are leaves to a given node
154
+ */
155
+ int countLeafNeighbors(const struct FastTree *ft, qbase_t which)
156
+ {
157
+ int nc = ft->tree->nl[which].size;
158
+ int i;
159
+ int leafCount = 0;
160
+ for (i = 0; i < nc; ++i)
161
+ if (ft->tree->nl[ft->tree->nl[which].n[i]].size == 1)
162
+ leafCount++;
163
+ return leafCount;
164
+ }
165
+
166
+ /*
167
+ * Returns the number of nodes that are unpaired
168
+ */
169
+ int countBadNodes(const struct FastTree *ft)
170
+ {
171
+ int i;
172
+ int badCount = 0;
173
+ for (i = ft->spec; i < ft->tree->size; ++i)
174
+ if (countLeafNeighbors(ft, i) == 1)
175
+ badCount++;
176
+ return badCount;
177
+ }
178
+
179
+ static VALUE ts_penaltyeq(VALUE self, VALUE val)
180
+ {
181
+ struct TreeScore *ts;
182
+ Data_Get_Struct(self, struct TreeScore, ts);
183
+ ts->penalty = NUM2DBL(val);
184
+ return rb_float_new(ts->penalty);
185
+ }
186
+
187
+ static VALUE ts_penalty(VALUE self)
188
+ {
189
+ struct TreeScore *ts;
190
+ Data_Get_Struct(self, struct TreeScore, ts);
191
+ return rb_float_new(ts->penalty);
192
+ }
193
+
194
+ /*
195
+ * Return a tree's S(T) score, between 0.0 (worst) and 1.0 (best), (penalty may force it outside range)
196
+ */
197
+ void printFastTree(struct FastTree *ft)
198
+ {
199
+ int i, j;
200
+ printf("Tree size: %d\n", ft->tree->size);
201
+ for (i = 0; i < ft->tree->size; ++i) {
202
+ const struct NodeList *cur = &ft->tree->nl[i];
203
+ printf("Node %d: %d neighbors, %d leaf-neighbors: ", i, ft->tree->nl[i].size, countLeafNeighbors(ft, i));
204
+ for (j = 0; j < cur->size; ++j) {
205
+ printf("%d->%d, ", i, cur->n[j]);
206
+ }
207
+ printf("\n");
208
+ }
209
+ printf("\n");
210
+ }
211
+
212
+ static VALUE ts_score(VALUE self, VALUE tree)
213
+ {
214
+ int badNodeCount;
215
+ struct TreeScore *ts;
216
+ struct FastTree *ft = convertTreeFromRuby(tree);
217
+ weight_t score;
218
+ float result, radj;
219
+ Data_Get_Struct(self, struct TreeScore, ts);
220
+ score = calculateWeightedScore(ft, ts->ql);
221
+ result = (score - ts->worst) / (ts->best - ts->worst);
222
+ badNodeCount = countBadNodes(ft);
223
+ radj = - ts->penalty * countBadNodes(ft);
224
+ freeFastTree(ft);
225
+ return rb_float_new(result + radj);
226
+ }
227
+
228
+ void initTreeScore()
229
+ {
230
+ cTreeScore = rb_define_class("TreeScore", rb_cObject);
231
+ rb_define_singleton_method(cTreeScore, "makeBestList", VALUEFUNC(ts_makeBest), 1);
232
+ rb_define_singleton_method(cTreeScore, "makeFullList", VALUEFUNC(ts_makeFull), 1);
233
+ rb_define_method(cTreeScore, "penalty", VALUEFUNC(ts_penalty), 0);
234
+ rb_define_method(cTreeScore, "penalty=", VALUEFUNC(ts_penaltyeq), 1);
235
+ rb_define_method(cTreeScore, "score", VALUEFUNC(ts_score), 1);
236
+ rb_define_method(cTreeScore, "worst", VALUEFUNC(ts_worst), 0);
237
+ rb_define_method(cTreeScore, "best", VALUEFUNC(ts_best), 0);
238
+ }
239
+
240
+ void Init_CompLearn()
241
+ {
242
+ initTreeScore();
243
+ }
244
+
@@ -0,0 +1,3 @@
1
+ struct TreeScore;
2
+ void initTreeScore();
3
+ void freeTreeScore(struct TreeScore *ts);
@@ -0,0 +1,65 @@
1
+ /* ext/config.h. Generated by configure. */
2
+ /* ext/config.h.in. Generated from configure.ac by autoheader. */
3
+
4
+ /* Define to 1 if you have the <inttypes.h> header file. */
5
+ #define HAVE_INTTYPES_H 1
6
+
7
+ /* Define to 1 if you have the <memory.h> header file. */
8
+ #define HAVE_MEMORY_H 1
9
+
10
+ /* Define to 1 if stdbool.h conforms to C99. */
11
+ /* #undef HAVE_STDBOOL_H */
12
+
13
+ /* Define to 1 if you have the <stdint.h> header file. */
14
+ #define HAVE_STDINT_H 1
15
+
16
+ /* Define to 1 if you have the <stdlib.h> header file. */
17
+ #define HAVE_STDLIB_H 1
18
+
19
+ /* Define to 1 if you have the <strings.h> header file. */
20
+ #define HAVE_STRINGS_H 1
21
+
22
+ /* Define to 1 if you have the <string.h> header file. */
23
+ #define HAVE_STRING_H 1
24
+
25
+ /* Define to 1 if you have the <sys/stat.h> header file. */
26
+ #define HAVE_SYS_STAT_H 1
27
+
28
+ /* Define to 1 if you have the <sys/time.h> header file. */
29
+ #define HAVE_SYS_TIME_H 1
30
+
31
+ /* Define to 1 if you have the <sys/types.h> header file. */
32
+ #define HAVE_SYS_TYPES_H 1
33
+
34
+ /* Define to 1 if you have the <unistd.h> header file. */
35
+ #define HAVE_UNISTD_H 1
36
+
37
+ /* Define to 1 if the system has the type `_Bool'. */
38
+ /* #undef HAVE__BOOL */
39
+
40
+ /* Name of package */
41
+ #define PACKAGE "complearn"
42
+
43
+ /* Define to the address where bug reports for this package should be sent. */
44
+ #define PACKAGE_BUGREPORT ""
45
+
46
+ /* Define to the full name of this package. */
47
+ #define PACKAGE_NAME ""
48
+
49
+ /* Define to the full name and version of this package. */
50
+ #define PACKAGE_STRING ""
51
+
52
+ /* Define to the one symbol short name of this package. */
53
+ #define PACKAGE_TARNAME ""
54
+
55
+ /* Define to the version of this package. */
56
+ #define PACKAGE_VERSION ""
57
+
58
+ /* Define to 1 if you have the ANSI C header files. */
59
+ #define STDC_HEADERS 1
60
+
61
+ /* Version number of package */
62
+ #define VERSION "0.6.2"
63
+
64
+ /* Define to empty if `const' does not conform to ANSI C. */
65
+ /* #undef const */
@@ -0,0 +1,64 @@
1
+ /* ext/config.h.in. Generated from configure.ac by autoheader. */
2
+
3
+ /* Define to 1 if you have the <inttypes.h> header file. */
4
+ #undef HAVE_INTTYPES_H
5
+
6
+ /* Define to 1 if you have the <memory.h> header file. */
7
+ #undef HAVE_MEMORY_H
8
+
9
+ /* Define to 1 if stdbool.h conforms to C99. */
10
+ #undef HAVE_STDBOOL_H
11
+
12
+ /* Define to 1 if you have the <stdint.h> header file. */
13
+ #undef HAVE_STDINT_H
14
+
15
+ /* Define to 1 if you have the <stdlib.h> header file. */
16
+ #undef HAVE_STDLIB_H
17
+
18
+ /* Define to 1 if you have the <strings.h> header file. */
19
+ #undef HAVE_STRINGS_H
20
+
21
+ /* Define to 1 if you have the <string.h> header file. */
22
+ #undef HAVE_STRING_H
23
+
24
+ /* Define to 1 if you have the <sys/stat.h> header file. */
25
+ #undef HAVE_SYS_STAT_H
26
+
27
+ /* Define to 1 if you have the <sys/time.h> header file. */
28
+ #undef HAVE_SYS_TIME_H
29
+
30
+ /* Define to 1 if you have the <sys/types.h> header file. */
31
+ #undef HAVE_SYS_TYPES_H
32
+
33
+ /* Define to 1 if you have the <unistd.h> header file. */
34
+ #undef HAVE_UNISTD_H
35
+
36
+ /* Define to 1 if the system has the type `_Bool'. */
37
+ #undef HAVE__BOOL
38
+
39
+ /* Name of package */
40
+ #undef PACKAGE
41
+
42
+ /* Define to the address where bug reports for this package should be sent. */
43
+ #undef PACKAGE_BUGREPORT
44
+
45
+ /* Define to the full name of this package. */
46
+ #undef PACKAGE_NAME
47
+
48
+ /* Define to the full name and version of this package. */
49
+ #undef PACKAGE_STRING
50
+
51
+ /* Define to the one symbol short name of this package. */
52
+ #undef PACKAGE_TARNAME
53
+
54
+ /* Define to the version of this package. */
55
+ #undef PACKAGE_VERSION
56
+
57
+ /* Define to 1 if you have the ANSI C header files. */
58
+ #undef STDC_HEADERS
59
+
60
+ /* Version number of package */
61
+ #undef VERSION
62
+
63
+ /* Define to empty if `const' does not conform to ANSI C. */
64
+ #undef const
@@ -0,0 +1,3 @@
1
+ require 'mkmf'
2
+ #have_library('bz2')
3
+ create_makefile('CompLearn')
@@ -0,0 +1,241 @@
1
+ #
2
+ # CLConfig
3
+ #
4
+ # Reads a given configuration file of the following form:
5
+ #
6
+ # Compressor: [BZ2,..]
7
+ # InputDir: [input directory]
8
+ # OutputDir: [output directory]
9
+ # WorkDir: [working directory]
10
+ # Symmetric: [yes/no]
11
+ # Hosts: [list of hosts, seperated by ,'s]
12
+ #
13
+ # Comments in the configuration file are prefixed with #
14
+ # individual values are accessed by configObj.[value]
15
+ # Note: this will exit if an error is encountered
16
+ #
17
+ # $Id: CLConfig.rb,v 1.5 2003/11/27 16:13:46 cilibrar Exp $
18
+ #
19
+
20
+ require 'yaml'
21
+ require 'CompLearnLib/FoundComp.rb'
22
+
23
+ class CLConfig
24
+
25
+ # The allowed config variables
26
+
27
+ @@BASECONFIGVARS =
28
+ [ # prettyName isArray Type Default Value
29
+ [ 'Compressor' , false, String, FoundComp.defaultCompressor() ],
30
+ [ 'CompressorCommand' , false, String, 'gzip -c -' ],
31
+ [ 'InputDir' , false, String, 'in' ],
32
+ [ 'OutputDir' , false, String, 'out' ],
33
+ [ 'WorkDir' , false, String, 'work' ],
34
+ [ 'Symmetric' , false, TrueClass, true ],
35
+ [ 'Hosts' , true, String, ['localhost'] ],
36
+ [ 'SingleProcess' , false, TrueClass, true ],
37
+ # maketree
38
+ [ 'UnpairedPenalty',false, Float, 0.0 ],
39
+ [ 'UseBestThirdOnly', false, TrueClass, false ],
40
+ [ 'MaxFailedTries', false, Integer, 100 ],
41
+ [ 'TreesPerTry', false, Integer, 1000 ],
42
+ [ 'InternalNodePrefix', false, String, 'n' ],
43
+ ]
44
+
45
+ # A class-method (since self is in class/module-scope, it refers to
46
+ # the Class object CLConfig). This one just tries to find a configuration
47
+ # file in a default spot, either in $HOME or /etc
48
+ # Also, this method caches the first read config object as a Singleton.
49
+
50
+ def self.singleUser?()
51
+ defined?(@@SINGLEUSER)
52
+ end
53
+ def self.setSingleUser()
54
+ @@SINGLEUSER = true
55
+ end
56
+ def self.setDefaultConfig(cfg)
57
+ @@DEFCON = cfg
58
+ end
59
+ def self.getDefaultConfig
60
+ unless defined?(@@DEFCON)
61
+ homedir = ENV['HOME'] || '/home'
62
+ filename = 'complearnrc'
63
+ goodcon = nil
64
+ [ "#{homedir}/.#{filename}", "/etc/#{filename}" ].each { |pathname|
65
+ next unless File.exist?(pathname)
66
+ goodcon = CLConfig.new(pathname)
67
+ break if goodcon
68
+ }
69
+ goodcon = CLConfig.new(nil) unless goodcon
70
+ @@DEFCON = goodcon
71
+ end
72
+ @@DEFCON
73
+ end
74
+ @@CONFIGVARS = { }
75
+ @@BASECONFIGVARS.each { | prettyName, isArray, typ, defval |
76
+ lowerName = prettyName.downcase
77
+ methodName = prettyName[0..0].downcase + prettyName[1..-1]
78
+ # accessing methods
79
+ if methodName == 'hosts'
80
+ def hosts() (ARGV.include?('-s') || CLConfig.singleUser?) ?
81
+ [@hosts[0]] : @hosts
82
+ end
83
+ else
84
+ module_eval "def #{methodName}() @#{methodName} end"
85
+ end
86
+ module_eval "def is#{prettyName}?() #{methodName} end" if typ == TrueClass
87
+ @@CONFIGVARS[lowerName] = [ isArray, typ, defval, prettyName, methodName ]
88
+ }
89
+ #
90
+ # constructor, read file
91
+ #
92
+ def initialize(filename)
93
+ @@CONFIGVARS.each_value { |a|
94
+ methodName, defVal = a[4], a[2]
95
+ instance_eval("@#{methodName} = defVal")
96
+ }
97
+ if filename
98
+ @filename = filename
99
+ readCLConfig()
100
+ end
101
+ end
102
+
103
+ def self.writeDefaultConfigFile(fname)
104
+ cfgmap = { }
105
+ @@CONFIGVARS.each { |lowername, stuff|
106
+ isArray, typ, defval, prettyName, methodName = stuff
107
+ cfgmap[prettyName] = defval
108
+ }
109
+ File.open(fname, "w") { |f|
110
+ f.write(cfgmap.to_yaml)
111
+ }
112
+ end
113
+
114
+ #
115
+ # read in the configuration file
116
+ #
117
+ def readCLConfig()
118
+ begin
119
+ yamlcfg = YAML::parse(File::new(@filename,"r").read)
120
+ rescue
121
+ print "Unable to read config file " + @filename + ": " + $!
122
+ print "\n"
123
+ exit
124
+ end
125
+
126
+ #fail "@filename must be a map not a #{yamlcfg.type_id}" unless yamlcfg.type_id.to_s == 'map'
127
+
128
+ yamlcfg.value.each { |itagname, whatnot|
129
+ tagname, value = itagname.downcase, whatnot[1].value
130
+ if @@CONFIGVARS.has_key?(tagname)
131
+ isArray,typ,default, pretty, methodName = @@CONFIGVARS[tagname]
132
+ if isArray
133
+ result = value.map { |i| parseValue(typ, i.value) }
134
+ else
135
+ result = parseValue(typ, value)
136
+ end
137
+ instance_eval("@#{methodName}=result")
138
+ else
139
+ fail "Unknown configuration option: #{itagname}"
140
+ end
141
+ }
142
+ end
143
+
144
+ def parseValue(typ, val)
145
+ val = val.clone
146
+ # remove whitespace
147
+ val.gsub!(/^\s+/,'')
148
+ val.gsub!(/\s+$/,'')
149
+ case typ.to_s # to_s necessary because === checks is_a? for Class
150
+
151
+ when 'TrueClass'
152
+ return val =~ /true/i || val =~ /yes/i || val =~ /on/i
153
+
154
+ when 'String'
155
+ return val
156
+
157
+ when 'Float'
158
+ return val.to_f
159
+
160
+ when 'Integer'
161
+ return val.to_i
162
+
163
+ else
164
+ puts "Illegal type: #{typ} for value #{val}"
165
+ exit(1)
166
+
167
+ end
168
+
169
+ end
170
+
171
+ # Searches for a file or directory specified by fname
172
+ # if fname is relative, it will search for it in the following
173
+ # order:
174
+ # inputDir specified in this CLConfig object
175
+ # current working directory
176
+ #
177
+ # If found, a string is returned with the absolute (full) pathname.
178
+ # If not, an exception is raised
179
+ def findInputFile(fname)
180
+ raise "fname can not be nil" if fname == nil
181
+ maybe = File.expand_path(fname, self.inputDir)
182
+ return maybe if File.exist?(maybe)
183
+ maybe = File.expand_path(fname)
184
+ return maybe if File.exist?(maybe)
185
+ raise "Cannot find file to open: #{fname}"
186
+ end
187
+
188
+ def readTaggedFileList(fname)
189
+ training = [ ]
190
+ features = [ ]
191
+ testing = [ ]
192
+ f = File.open(fname, 'r')
193
+ while line = f.gets
194
+ line.chomp!
195
+ next if line =~ /^\s*#/
196
+ next unless line =~ /[\S]/
197
+ if line =~ /^\s*(\S+)\s+(\S)\s+([^\s#]+)/
198
+ tagnum, tagtype, fname = $1.to_i, $2.downcase, $3
199
+ realfname = findInputFile(fname)
200
+ if tagtype == 'p'
201
+ testing << [ tagnum.to_i, realfname ]
202
+ elsif tagtype == 'f'
203
+ features << realfname
204
+ else # tagtype == 'g'
205
+ training << [ tagnum.to_i, realfname ]
206
+ end
207
+ end
208
+ end
209
+ return [ training, features, testing ]
210
+ end
211
+ def getFilelistFromDirOrFile(objname)
212
+ files = [ ]
213
+ if File.ftype(objname) == 'directory'
214
+ d = Dir.new(objname)
215
+ d.each { |f|
216
+ goodf = "#{objname}/#{f}"
217
+ files << goodf if File.ftype(goodf) == 'file'
218
+ }
219
+ files.sort!
220
+ else
221
+ files = readFileList(objname)
222
+ end
223
+ files
224
+ end
225
+ def readFileList(fname)
226
+ f = File.open(fname, 'r')
227
+ result = [ ]
228
+ while line = f.gets
229
+ line.chomp!
230
+ next if line =~ /^#/
231
+ next unless line =~ /[\S]/
232
+ realfname = findInputFile(line)
233
+ result << realfname
234
+ end
235
+ result
236
+ end
237
+ def self.printVersionAndExit()
238
+ puts "CompLearn #{FoundComp::VERSION}"
239
+ exit(0)
240
+ end
241
+ end