complearn 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,59 @@
1
+ #
2
+ # CompressionObject
3
+ #
4
+ # This is a compressible object. Abstracts string/files
5
+ #
6
+ # $Id: CompressionObject.rb,v 1.1 2003/10/23 17:03:59 cilibrar Exp $
7
+ #
8
+
9
+ class CompressionObject
10
+
11
+ private_class_method :new
12
+
13
+ #
14
+ # private constructor
15
+ #
16
+ def initialize(str,id,isFile)
17
+ @cId = id
18
+ @str = str
19
+ @isFile = isFile
20
+ end
21
+
22
+ #
23
+ # Initialize from file
24
+ #
25
+ def self.fromFile(fname)
26
+ c = new('',fname,true)
27
+ return(c)
28
+ end
29
+
30
+ #
31
+ # Initialize from string
32
+ #
33
+ def self.fromString(str,id)
34
+ c = new(str,id,false)
35
+ return(c)
36
+ end
37
+
38
+ #
39
+ # id is to_s
40
+ #
41
+ def to_s()
42
+ @cId
43
+ end
44
+
45
+ #
46
+ # return the value depending on whether its a file or
47
+ # string
48
+ #
49
+ def value()
50
+ if @isFile
51
+ raise "Cannot find #{@cId}" unless File.exist?(@cId)
52
+ raise "Cannot read #{@cId}" unless File.stat(@cId).readable?
53
+ return(File.open(@cId,"r").read)
54
+ else
55
+ return(@str)
56
+ end
57
+ end
58
+
59
+ end
@@ -0,0 +1,99 @@
1
+ #
2
+ # CompressionTask
3
+ #
4
+ # task object to handle file compressions, used
5
+ # with TaskMaster
6
+ #
7
+ # $Id: CompressionTask.rb,v 1.3 2003/11/14 14:51:44 cilibrar Exp $
8
+
9
+ require 'CompLearnLib/Task.rb'
10
+ require 'CompLearnLib/CLConfig.rb'
11
+ require 'CompLearnLib/FoundComp.rb'
12
+
13
+ if FoundComp::FOUNDBZIP2
14
+ require 'bz2'
15
+ end
16
+
17
+ if FoundComp::FOUNDGZIP
18
+ require 'zlib'
19
+ end
20
+
21
+ include MTask
22
+
23
+ def gzipCompress(str)
24
+ level = Zlib::BEST_COMPRESSION
25
+ z = Zlib::Deflate.new(level)
26
+ dst = z.deflate(str, Zlib::FINISH)
27
+ z.close
28
+ dst
29
+ end
30
+
31
+ class CompressionTask < Task
32
+
33
+ attr_reader :idTotal
34
+
35
+ def initialize(cObjects)
36
+ @cObjects = cObjects.clone
37
+ @idTotal = ""
38
+ cObjects.each { |obj|
39
+ idTotal << obj.to_s
40
+ }
41
+ end
42
+
43
+ def execute()
44
+ total = ""
45
+ @cObjects.each { |obj|
46
+ total << obj.value
47
+ }
48
+ compmethod = CLConfig.getDefaultConfig().compressor
49
+
50
+ case compmethod.downcase
51
+
52
+ when "bz2", "bzip2", "bzip", "bz"
53
+
54
+ if FoundComp::FOUNDBZIP2
55
+ reply(BZ2.compress(total).size)
56
+ else
57
+ fail "Sorry, bzip2 compression is not installed on this system."
58
+ end
59
+
60
+ when "gzip", "gz"
61
+ if FoundComp::FOUNDGZIP
62
+ reply(gzipCompress(total).size)
63
+ else
64
+ fail "Sorry, gzip compression is not installed on this system."
65
+ end
66
+
67
+ when "shell"
68
+ cmd = CLConfig.getDefaultConfig().compressorCommand
69
+ result = 0
70
+ begin
71
+ f = IO.popen(cmd, "w+")
72
+ writer = Thread.new() {
73
+ blocksize, inpos = 1024, 0
74
+ while inpos < total.size
75
+ wanted = total.size - inpos
76
+ wanted = blocksize if wanted > blocksize
77
+ inpos += f.write(total[inpos...inpos+wanted])
78
+ end
79
+ f.close_write
80
+ }
81
+ result = f.read.size
82
+ f.close
83
+ rescue
84
+ puts "Got error: #{$!}"
85
+ result = -1
86
+ end
87
+ reply(result)
88
+
89
+ when "none"
90
+ fail "Sorry, no valid compressors installed!"
91
+
92
+ else
93
+ fail "Sorry, compression method #{compmethod} is not supported"
94
+
95
+ end
96
+
97
+ end
98
+ end
99
+
@@ -0,0 +1,18 @@
1
+ class DistMatrix
2
+
3
+ def DistMatrix.readFromFile(f)
4
+ f = File.open(f, 'r') if f.is_a?(String)
5
+ result = f.readlines
6
+ result.delete_if { |line| line =~ /^#/ || line !~ /\S+/ }
7
+ result.collect { |line| line.split(' ').collect { |i| i.to_f } }
8
+ end
9
+
10
+ def DistMatrix.convertToString(distmatrix)
11
+ result = ''
12
+ distmatrix.each { |row|
13
+ result << row.join(' ') + "\n"
14
+ }
15
+ result
16
+ end
17
+
18
+ end
@@ -0,0 +1,10 @@
1
+ class FoundComp
2
+ FOUNDGZIP = true
3
+ FOUNDBZIP2 = true
4
+ VERSION = '0.6.2'
5
+ def FoundComp.defaultCompressor()
6
+ return "bzip2" if FOUNDBZIP2
7
+ return "gzip" if FOUNDGZIP
8
+ return "none"
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ class FoundComp
2
+ FOUNDGZIP = @FOUNDGZIP@
3
+ FOUNDBZIP2 = @FOUNDBZIP2@
4
+ VERSION = '@VERSION@'
5
+ def FoundComp.defaultCompressor()
6
+ return "bzip2" if FOUNDBZIP2
7
+ return "gzip" if FOUNDGZIP
8
+ return "none"
9
+ end
10
+ end
@@ -0,0 +1,248 @@
1
+ #
2
+ # Ncd calculations
3
+ #
4
+ # $Id: Ncd.rb,v 1.2 2003/10/30 18:03:21 cilibrar Exp $
5
+ #
6
+
7
+ require 'CompLearnLib/CLConfig'
8
+ require 'CompLearnLib/CompressionObject'
9
+ require 'CompLearnLib/CompressionTask'
10
+ require 'CompLearnLib/Task'
11
+ include MTask
12
+
13
+ class Ncd
14
+
15
+ #private_class_method :ncd
16
+
17
+ #
18
+ #
19
+ def initialize()
20
+ @config = CLConfig.getDefaultConfig()
21
+ # for now
22
+ end
23
+
24
+ ### private
25
+
26
+ #
27
+ # get the ncd given pre compressed input
28
+ #
29
+ # ca: size of compressed a
30
+ # cb: size of compressed b
31
+ # cab: size of compressed ab
32
+ # cba: size of compressed ba
33
+ #
34
+ # returns: ncd of a and b
35
+ #
36
+ def ncd(ca,cb,cab,cba)
37
+ maxk = ca < cb ? cb : ca
38
+ kab = cba - cb
39
+ kba = cab - ca
40
+ maxck = kab < kba ? kba : kab
41
+ (maxck.to_f / maxk.to_f).to_f
42
+ end
43
+
44
+ #
45
+ # execute the list of given compression tasks
46
+ #
47
+ # list: a list of objects to compress of the
48
+ # form [[object1],[object1,object2],..]
49
+ #
50
+ # returns: a hash of compressed sizes, the key to
51
+ # the hash is the id (or concatenation of ids)
52
+ # of the compressible object(s)
53
+ #
54
+ def compress(list)
55
+
56
+ # dispatch compression tasks
57
+ results = { }
58
+ todo = list.size
59
+ TaskMaster.init
60
+ list.each { |item|
61
+ t = CompressionTask.new(item)
62
+ TaskMaster.enqueue(t) { |res, t, src|
63
+ # XXX put in resv chr
64
+ results[t.idTotal] = res
65
+ todo -= 1
66
+ }
67
+ }
68
+
69
+ # harvest results
70
+ while(todo != 0)
71
+ TaskMaster.waitForReply
72
+ end
73
+
74
+ # add vice versa compression if symmetric
75
+ if @config.isSymmetric?
76
+ list.each { |item|
77
+ if item.size > 1
78
+ abName = item.join()
79
+ newItem = item.reverse()
80
+ baName = newItem.join()
81
+ results[baName] = results[abName]
82
+ end
83
+ }
84
+ end
85
+
86
+ return(results)
87
+ end
88
+
89
+ #
90
+ # build singles and pairs to compress from the
91
+ # given list
92
+ #
93
+ def buildPairs(list)
94
+ cList = Array.new()
95
+ list.each { |item|
96
+ cList.push([item])
97
+ if @config.isSymmetric?
98
+ list.each { |item2|
99
+ cList.push([item,item2])
100
+ }
101
+ else
102
+ list.each { |item2|
103
+ cList.push([item,item2])
104
+ cList.push([item2,item])
105
+ }
106
+ end
107
+ }
108
+ return(cList)
109
+ end
110
+
111
+ #
112
+ # build singles and pairs with the given object
113
+ #
114
+ def buildSinglePairs(a,list)
115
+ cList = Array.new()
116
+ list.each { |item|
117
+ cList.push([item])
118
+ if @config.isSymmetric?
119
+ cList.push([a,item])
120
+ else
121
+ cList.push([a,item])
122
+ cList.push([item,a])
123
+ end
124
+ }
125
+ return(cList)
126
+ end
127
+
128
+ ### single pair ncd
129
+
130
+ #
131
+ # calculate ncd from two cObjects
132
+ #
133
+ def ncdSingle(a,b)
134
+ if @config.isSymmetric?
135
+ c = compress([[a],[b],[a,b]])
136
+ c["#{b}#{a}"] = c["#{a}#{b}"]
137
+ else
138
+ c = compress([[a],[b],[a,b],[b,a]])
139
+ end
140
+ ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
141
+ end
142
+
143
+ #
144
+ # calculate ncd from 2 strings
145
+ #
146
+ def ncdString(a, b)
147
+ aObj = CompressionObject.fromString(a, a)
148
+ bObj = CompressionObject.fromString(b, b)
149
+ ncdSingle(aObj,bObj)
150
+ end
151
+
152
+ #
153
+ # calculate ncd from 2 files
154
+ #
155
+ def ncdFile(a, b)
156
+ aObj = CompressionObject.fromFile(a)
157
+ bObj = CompressionObject.fromFile(b)
158
+ ncdSingle(aObj,bObj)
159
+ end
160
+
161
+ ### vector ncd
162
+
163
+ #
164
+ # calculate the ncd between one object and a list
165
+ # of others
166
+ #
167
+ def ncdVector(a,list)
168
+
169
+ # precompress everything necessary
170
+ cList = buildSinglePairs(a,list)
171
+ cList.push([a])
172
+ c = compress(cList)
173
+
174
+ # calculate ncd of a with everything else
175
+ res = Array.new()
176
+ list.each { |b|
177
+ res.push(ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"]))
178
+ }
179
+ return(res)
180
+ end
181
+
182
+ #
183
+ # calculate the mutual ncd of a string with a list of strings
184
+ #
185
+ def ncdVectorString(a, list)
186
+ aObj = CompressionObject.fromString(a)
187
+ listObj = list.collect { |item|
188
+ CompressionObject.fromString(item)
189
+ }
190
+ ncdVector(aObj,listObj)
191
+ end
192
+
193
+ #
194
+ # calculate the mutual ncd of a file with a list of files
195
+ #
196
+
197
+ def ncdVectorFile(a, list)
198
+ aObj = CompressionObject.fromFile(a)
199
+ listObj = list.collect { |item|
200
+ CompressionObject.fromFile(item)
201
+ }
202
+ ncdVector(aObj,listObj)
203
+ end
204
+
205
+
206
+ ### matrix ncd
207
+
208
+ #
209
+ # calculate the mutual ncd of all lists objects
210
+ #
211
+ def ncdMatrix(list)
212
+ # precompress everything necessary
213
+ cList = buildPairs(list)
214
+ c = compress(cList)
215
+
216
+ # calculate ncd of a with everything else
217
+ res = Array.new()
218
+ list.each { |a|
219
+ res << [ ]
220
+ list.each { |b|
221
+ res[-1] << ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
222
+ }
223
+ }
224
+ return(res)
225
+ end
226
+
227
+ #
228
+ # calculate the mutual ncd of a list of strings
229
+ #
230
+ def ncdMatrixString(list) # most caching benefit
231
+ listObj = list.collect { |item|
232
+ CompressionObject.fromString(item)
233
+ }
234
+ ncdMatrix(listObj)
235
+ end
236
+
237
+ #
238
+ # calculate the mutual ncd of a list of files
239
+ #
240
+ def ncdMatrixFile(list)
241
+ listObj = list.collect { |item|
242
+ CompressionObject.fromFile(item)
243
+ }
244
+ ncdMatrix(listObj)
245
+ end
246
+ end
247
+
248
+