complearn 0.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +13 -0
- data/COPYING +340 -0
- data/ChangeLog +0 -0
- data/INSTALL +231 -0
- data/Makefile +352 -0
- data/Makefile.am +76 -0
- data/Makefile.in +352 -0
- data/NEWS +7 -0
- data/README +0 -0
- data/aclocal.m4 +104 -0
- data/bin/Makefile +209 -0
- data/bin/Makefile.am +8 -0
- data/bin/Makefile.in +209 -0
- data/bin/labeltree +68 -0
- data/bin/labeltree.in +68 -0
- data/bin/makesvm +70 -0
- data/bin/makesvm.in +70 -0
- data/bin/maketree +98 -0
- data/bin/maketree.in +98 -0
- data/bin/ncd +43 -0
- data/bin/ncd.in +43 -0
- data/bin/ncdmatrix +54 -0
- data/bin/ncdmatrix.in +54 -0
- data/bin/ncdvector +50 -0
- data/bin/ncdvector.in +50 -0
- data/complearn-0.6.2.gem +0 -0
- data/complearn.gemspec +57 -0
- data/config.log +597 -0
- data/config.status +1082 -0
- data/configure +4922 -0
- data/configure.ac +91 -0
- data/confstat5FpLBf/config.h +65 -0
- data/confstat5FpLBf/subs-1.sed +50 -0
- data/confstat5FpLBf/subs-2.sed +13 -0
- data/confstat5FpLBf/subs.frag +0 -0
- data/confstat5FpLBf/subs.sed +59 -0
- data/confstat5FpLBf/undefs.sed +24 -0
- data/doc/FAQ.txt +67 -0
- data/doc/Makefile +286 -0
- data/doc/Makefile.am +11 -0
- data/doc/Makefile.in +286 -0
- data/doc/devguide.txt +15 -0
- data/doc/example.complearnrc +14 -0
- data/doc/examples.txt +35 -0
- data/doc/man/Makefile +255 -0
- data/doc/man/Makefile.am +11 -0
- data/doc/man/Makefile.in +255 -0
- data/doc/man/complearn.5 +91 -0
- data/doc/man/labeltree.1 +35 -0
- data/doc/man/makesvm.1 +60 -0
- data/doc/man/maketree.1 +58 -0
- data/doc/man/ncd.1 +51 -0
- data/doc/man/ncdmatrix.1 +40 -0
- data/doc/man/ncdvector.1 +42 -0
- data/doc/readme.txt +101 -0
- data/doc/userguide.txt +46 -0
- data/examples/genes/blueWhale.txt +1 -0
- data/examples/genes/cat.txt +1 -0
- data/examples/genes/chimpanzee.txt +1 -0
- data/examples/genes/finWhale.txt +1 -0
- data/examples/genes/graySeal.txt +1 -0
- data/examples/genes/harborSeal.txt +1 -0
- data/examples/genes/horse.txt +1 -0
- data/examples/genes/human.txt +1 -0
- data/examples/genes/mouse.txt +1 -0
- data/examples/genes/rat.txt +1 -0
- data/ext/Makefile +167 -0
- data/ext/Quartet.c +399 -0
- data/ext/Quartet.h +62 -0
- data/ext/TreeScore.c +244 -0
- data/ext/TreeScore.h +3 -0
- data/ext/config.h +65 -0
- data/ext/config.h.in +64 -0
- data/ext/extconf.rb +3 -0
- data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
- data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
- data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
- data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
- data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
- data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
- data/ext/lib/CompLearnLib/Ncd.rb +248 -0
- data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
- data/ext/lib/CompLearnLib/Task.rb +39 -0
- data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
- data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
- data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
- data/ext/lib/CompLearnLib/Tree.rb +300 -0
- data/install-sh +294 -0
- data/missing +336 -0
- data/mkinstalldirs +111 -0
- data/o +24 -0
- data/scripts/CompLearn.iss +89 -0
- data/scripts/CompLearn.iss.in +89 -0
- data/scripts/debian/changelog +6 -0
- data/scripts/debian/control +14 -0
- data/scripts/makeSetup.sh +23 -0
- data/scripts/makeSetup.sh.in +23 -0
- data/scripts/makedeb.zsh +46 -0
- data/scripts/makedeb.zsh.in +46 -0
- data/tests/alltests.rb +2 -0
- data/tests/bz2test.rb +516 -0
- data/tests/sshagent-test.rb +48 -0
- data/tests/tests.rb +275 -0
- metadata +164 -0
@@ -0,0 +1,59 @@
|
|
1
|
+
#
|
2
|
+
# CompressionObject
|
3
|
+
#
|
4
|
+
# This is a compressible object. Abstracts string/files
|
5
|
+
#
|
6
|
+
# $Id: CompressionObject.rb,v 1.1 2003/10/23 17:03:59 cilibrar Exp $
|
7
|
+
#
|
8
|
+
|
9
|
+
class CompressionObject
|
10
|
+
|
11
|
+
private_class_method :new
|
12
|
+
|
13
|
+
#
|
14
|
+
# private constructor
|
15
|
+
#
|
16
|
+
def initialize(str,id,isFile)
|
17
|
+
@cId = id
|
18
|
+
@str = str
|
19
|
+
@isFile = isFile
|
20
|
+
end
|
21
|
+
|
22
|
+
#
|
23
|
+
# Initialize from file
|
24
|
+
#
|
25
|
+
def self.fromFile(fname)
|
26
|
+
c = new('',fname,true)
|
27
|
+
return(c)
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Initialize from string
|
32
|
+
#
|
33
|
+
def self.fromString(str,id)
|
34
|
+
c = new(str,id,false)
|
35
|
+
return(c)
|
36
|
+
end
|
37
|
+
|
38
|
+
#
|
39
|
+
# id is to_s
|
40
|
+
#
|
41
|
+
def to_s()
|
42
|
+
@cId
|
43
|
+
end
|
44
|
+
|
45
|
+
#
|
46
|
+
# return the value depending on whether its a file or
|
47
|
+
# string
|
48
|
+
#
|
49
|
+
def value()
|
50
|
+
if @isFile
|
51
|
+
raise "Cannot find #{@cId}" unless File.exist?(@cId)
|
52
|
+
raise "Cannot read #{@cId}" unless File.stat(@cId).readable?
|
53
|
+
return(File.open(@cId,"r").read)
|
54
|
+
else
|
55
|
+
return(@str)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#
|
2
|
+
# CompressionTask
|
3
|
+
#
|
4
|
+
# task object to handle file compressions, used
|
5
|
+
# with TaskMaster
|
6
|
+
#
|
7
|
+
# $Id: CompressionTask.rb,v 1.3 2003/11/14 14:51:44 cilibrar Exp $
|
8
|
+
|
9
|
+
require 'CompLearnLib/Task.rb'
|
10
|
+
require 'CompLearnLib/CLConfig.rb'
|
11
|
+
require 'CompLearnLib/FoundComp.rb'
|
12
|
+
|
13
|
+
if FoundComp::FOUNDBZIP2
|
14
|
+
require 'bz2'
|
15
|
+
end
|
16
|
+
|
17
|
+
if FoundComp::FOUNDGZIP
|
18
|
+
require 'zlib'
|
19
|
+
end
|
20
|
+
|
21
|
+
include MTask
|
22
|
+
|
23
|
+
def gzipCompress(str)
|
24
|
+
level = Zlib::BEST_COMPRESSION
|
25
|
+
z = Zlib::Deflate.new(level)
|
26
|
+
dst = z.deflate(str, Zlib::FINISH)
|
27
|
+
z.close
|
28
|
+
dst
|
29
|
+
end
|
30
|
+
|
31
|
+
class CompressionTask < Task
|
32
|
+
|
33
|
+
attr_reader :idTotal
|
34
|
+
|
35
|
+
def initialize(cObjects)
|
36
|
+
@cObjects = cObjects.clone
|
37
|
+
@idTotal = ""
|
38
|
+
cObjects.each { |obj|
|
39
|
+
idTotal << obj.to_s
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
def execute()
|
44
|
+
total = ""
|
45
|
+
@cObjects.each { |obj|
|
46
|
+
total << obj.value
|
47
|
+
}
|
48
|
+
compmethod = CLConfig.getDefaultConfig().compressor
|
49
|
+
|
50
|
+
case compmethod.downcase
|
51
|
+
|
52
|
+
when "bz2", "bzip2", "bzip", "bz"
|
53
|
+
|
54
|
+
if FoundComp::FOUNDBZIP2
|
55
|
+
reply(BZ2.compress(total).size)
|
56
|
+
else
|
57
|
+
fail "Sorry, bzip2 compression is not installed on this system."
|
58
|
+
end
|
59
|
+
|
60
|
+
when "gzip", "gz"
|
61
|
+
if FoundComp::FOUNDGZIP
|
62
|
+
reply(gzipCompress(total).size)
|
63
|
+
else
|
64
|
+
fail "Sorry, gzip compression is not installed on this system."
|
65
|
+
end
|
66
|
+
|
67
|
+
when "shell"
|
68
|
+
cmd = CLConfig.getDefaultConfig().compressorCommand
|
69
|
+
result = 0
|
70
|
+
begin
|
71
|
+
f = IO.popen(cmd, "w+")
|
72
|
+
writer = Thread.new() {
|
73
|
+
blocksize, inpos = 1024, 0
|
74
|
+
while inpos < total.size
|
75
|
+
wanted = total.size - inpos
|
76
|
+
wanted = blocksize if wanted > blocksize
|
77
|
+
inpos += f.write(total[inpos...inpos+wanted])
|
78
|
+
end
|
79
|
+
f.close_write
|
80
|
+
}
|
81
|
+
result = f.read.size
|
82
|
+
f.close
|
83
|
+
rescue
|
84
|
+
puts "Got error: #{$!}"
|
85
|
+
result = -1
|
86
|
+
end
|
87
|
+
reply(result)
|
88
|
+
|
89
|
+
when "none"
|
90
|
+
fail "Sorry, no valid compressors installed!"
|
91
|
+
|
92
|
+
else
|
93
|
+
fail "Sorry, compression method #{compmethod} is not supported"
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class DistMatrix
|
2
|
+
|
3
|
+
def DistMatrix.readFromFile(f)
|
4
|
+
f = File.open(f, 'r') if f.is_a?(String)
|
5
|
+
result = f.readlines
|
6
|
+
result.delete_if { |line| line =~ /^#/ || line !~ /\S+/ }
|
7
|
+
result.collect { |line| line.split(' ').collect { |i| i.to_f } }
|
8
|
+
end
|
9
|
+
|
10
|
+
def DistMatrix.convertToString(distmatrix)
|
11
|
+
result = ''
|
12
|
+
distmatrix.each { |row|
|
13
|
+
result << row.join(' ') + "\n"
|
14
|
+
}
|
15
|
+
result
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,248 @@
|
|
1
|
+
#
|
2
|
+
# Ncd calculations
|
3
|
+
#
|
4
|
+
# $Id: Ncd.rb,v 1.2 2003/10/30 18:03:21 cilibrar Exp $
|
5
|
+
#
|
6
|
+
|
7
|
+
require 'CompLearnLib/CLConfig'
|
8
|
+
require 'CompLearnLib/CompressionObject'
|
9
|
+
require 'CompLearnLib/CompressionTask'
|
10
|
+
require 'CompLearnLib/Task'
|
11
|
+
include MTask
|
12
|
+
|
13
|
+
class Ncd
|
14
|
+
|
15
|
+
#private_class_method :ncd
|
16
|
+
|
17
|
+
#
|
18
|
+
#
|
19
|
+
def initialize()
|
20
|
+
@config = CLConfig.getDefaultConfig()
|
21
|
+
# for now
|
22
|
+
end
|
23
|
+
|
24
|
+
### private
|
25
|
+
|
26
|
+
#
|
27
|
+
# get the ncd given pre compressed input
|
28
|
+
#
|
29
|
+
# ca: size of compressed a
|
30
|
+
# cb: size of compressed b
|
31
|
+
# cab: size of compressed ab
|
32
|
+
# cba: size of compressed ba
|
33
|
+
#
|
34
|
+
# returns: ncd of a and b
|
35
|
+
#
|
36
|
+
def ncd(ca,cb,cab,cba)
|
37
|
+
maxk = ca < cb ? cb : ca
|
38
|
+
kab = cba - cb
|
39
|
+
kba = cab - ca
|
40
|
+
maxck = kab < kba ? kba : kab
|
41
|
+
(maxck.to_f / maxk.to_f).to_f
|
42
|
+
end
|
43
|
+
|
44
|
+
#
|
45
|
+
# execute the list of given compression tasks
|
46
|
+
#
|
47
|
+
# list: a list of objects to compress of the
|
48
|
+
# form [[object1],[object1,object2],..]
|
49
|
+
#
|
50
|
+
# returns: a hash of compressed sizes, the key to
|
51
|
+
# the hash is the id (or concatenation of ids)
|
52
|
+
# of the compressible object(s)
|
53
|
+
#
|
54
|
+
def compress(list)
|
55
|
+
|
56
|
+
# dispatch compression tasks
|
57
|
+
results = { }
|
58
|
+
todo = list.size
|
59
|
+
TaskMaster.init
|
60
|
+
list.each { |item|
|
61
|
+
t = CompressionTask.new(item)
|
62
|
+
TaskMaster.enqueue(t) { |res, t, src|
|
63
|
+
# XXX put in resv chr
|
64
|
+
results[t.idTotal] = res
|
65
|
+
todo -= 1
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
# harvest results
|
70
|
+
while(todo != 0)
|
71
|
+
TaskMaster.waitForReply
|
72
|
+
end
|
73
|
+
|
74
|
+
# add vice versa compression if symmetric
|
75
|
+
if @config.isSymmetric?
|
76
|
+
list.each { |item|
|
77
|
+
if item.size > 1
|
78
|
+
abName = item.join()
|
79
|
+
newItem = item.reverse()
|
80
|
+
baName = newItem.join()
|
81
|
+
results[baName] = results[abName]
|
82
|
+
end
|
83
|
+
}
|
84
|
+
end
|
85
|
+
|
86
|
+
return(results)
|
87
|
+
end
|
88
|
+
|
89
|
+
#
|
90
|
+
# build singles and pairs to compress from the
|
91
|
+
# given list
|
92
|
+
#
|
93
|
+
def buildPairs(list)
|
94
|
+
cList = Array.new()
|
95
|
+
list.each { |item|
|
96
|
+
cList.push([item])
|
97
|
+
if @config.isSymmetric?
|
98
|
+
list.each { |item2|
|
99
|
+
cList.push([item,item2])
|
100
|
+
}
|
101
|
+
else
|
102
|
+
list.each { |item2|
|
103
|
+
cList.push([item,item2])
|
104
|
+
cList.push([item2,item])
|
105
|
+
}
|
106
|
+
end
|
107
|
+
}
|
108
|
+
return(cList)
|
109
|
+
end
|
110
|
+
|
111
|
+
#
|
112
|
+
# build singles and pairs with the given object
|
113
|
+
#
|
114
|
+
def buildSinglePairs(a,list)
|
115
|
+
cList = Array.new()
|
116
|
+
list.each { |item|
|
117
|
+
cList.push([item])
|
118
|
+
if @config.isSymmetric?
|
119
|
+
cList.push([a,item])
|
120
|
+
else
|
121
|
+
cList.push([a,item])
|
122
|
+
cList.push([item,a])
|
123
|
+
end
|
124
|
+
}
|
125
|
+
return(cList)
|
126
|
+
end
|
127
|
+
|
128
|
+
### single pair ncd
|
129
|
+
|
130
|
+
#
|
131
|
+
# calculate ncd from two cObjects
|
132
|
+
#
|
133
|
+
def ncdSingle(a,b)
|
134
|
+
if @config.isSymmetric?
|
135
|
+
c = compress([[a],[b],[a,b]])
|
136
|
+
c["#{b}#{a}"] = c["#{a}#{b}"]
|
137
|
+
else
|
138
|
+
c = compress([[a],[b],[a,b],[b,a]])
|
139
|
+
end
|
140
|
+
ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
|
141
|
+
end
|
142
|
+
|
143
|
+
#
|
144
|
+
# calculate ncd from 2 strings
|
145
|
+
#
|
146
|
+
def ncdString(a, b)
|
147
|
+
aObj = CompressionObject.fromString(a, a)
|
148
|
+
bObj = CompressionObject.fromString(b, b)
|
149
|
+
ncdSingle(aObj,bObj)
|
150
|
+
end
|
151
|
+
|
152
|
+
#
|
153
|
+
# calculate ncd from 2 files
|
154
|
+
#
|
155
|
+
def ncdFile(a, b)
|
156
|
+
aObj = CompressionObject.fromFile(a)
|
157
|
+
bObj = CompressionObject.fromFile(b)
|
158
|
+
ncdSingle(aObj,bObj)
|
159
|
+
end
|
160
|
+
|
161
|
+
### vector ncd
|
162
|
+
|
163
|
+
#
|
164
|
+
# calculate the ncd between one object and a list
|
165
|
+
# of others
|
166
|
+
#
|
167
|
+
def ncdVector(a,list)
|
168
|
+
|
169
|
+
# precompress everything necessary
|
170
|
+
cList = buildSinglePairs(a,list)
|
171
|
+
cList.push([a])
|
172
|
+
c = compress(cList)
|
173
|
+
|
174
|
+
# calculate ncd of a with everything else
|
175
|
+
res = Array.new()
|
176
|
+
list.each { |b|
|
177
|
+
res.push(ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"]))
|
178
|
+
}
|
179
|
+
return(res)
|
180
|
+
end
|
181
|
+
|
182
|
+
#
|
183
|
+
# calculate the mutual ncd of a string with a list of strings
|
184
|
+
#
|
185
|
+
def ncdVectorString(a, list)
|
186
|
+
aObj = CompressionObject.fromString(a)
|
187
|
+
listObj = list.collect { |item|
|
188
|
+
CompressionObject.fromString(item)
|
189
|
+
}
|
190
|
+
ncdVector(aObj,listObj)
|
191
|
+
end
|
192
|
+
|
193
|
+
#
|
194
|
+
# calculate the mutual ncd of a file with a list of files
|
195
|
+
#
|
196
|
+
|
197
|
+
def ncdVectorFile(a, list)
|
198
|
+
aObj = CompressionObject.fromFile(a)
|
199
|
+
listObj = list.collect { |item|
|
200
|
+
CompressionObject.fromFile(item)
|
201
|
+
}
|
202
|
+
ncdVector(aObj,listObj)
|
203
|
+
end
|
204
|
+
|
205
|
+
|
206
|
+
### matrix ncd
|
207
|
+
|
208
|
+
#
|
209
|
+
# calculate the mutual ncd of all lists objects
|
210
|
+
#
|
211
|
+
def ncdMatrix(list)
|
212
|
+
# precompress everything necessary
|
213
|
+
cList = buildPairs(list)
|
214
|
+
c = compress(cList)
|
215
|
+
|
216
|
+
# calculate ncd of a with everything else
|
217
|
+
res = Array.new()
|
218
|
+
list.each { |a|
|
219
|
+
res << [ ]
|
220
|
+
list.each { |b|
|
221
|
+
res[-1] << ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
|
222
|
+
}
|
223
|
+
}
|
224
|
+
return(res)
|
225
|
+
end
|
226
|
+
|
227
|
+
#
|
228
|
+
# calculate the mutual ncd of a list of strings
|
229
|
+
#
|
230
|
+
def ncdMatrixString(list) # most caching benefit
|
231
|
+
listObj = list.collect { |item|
|
232
|
+
CompressionObject.fromString(item)
|
233
|
+
}
|
234
|
+
ncdMatrix(listObj)
|
235
|
+
end
|
236
|
+
|
237
|
+
#
|
238
|
+
# calculate the mutual ncd of a list of files
|
239
|
+
#
|
240
|
+
def ncdMatrixFile(list)
|
241
|
+
listObj = list.collect { |item|
|
242
|
+
CompressionObject.fromFile(item)
|
243
|
+
}
|
244
|
+
ncdMatrix(listObj)
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
|