complearn 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,59 @@
1
+ #
2
+ # CompressionObject
3
+ #
4
+ # This is a compressible object. Abstracts string/files
5
+ #
6
+ # $Id: CompressionObject.rb,v 1.1 2003/10/23 17:03:59 cilibrar Exp $
7
+ #
8
+
9
+ class CompressionObject
10
+
11
+ private_class_method :new
12
+
13
+ #
14
+ # private constructor
15
+ #
16
+ def initialize(str,id,isFile)
17
+ @cId = id
18
+ @str = str
19
+ @isFile = isFile
20
+ end
21
+
22
+ #
23
+ # Initialize from file
24
+ #
25
+ def self.fromFile(fname)
26
+ c = new('',fname,true)
27
+ return(c)
28
+ end
29
+
30
+ #
31
+ # Initialize from string
32
+ #
33
+ def self.fromString(str,id)
34
+ c = new(str,id,false)
35
+ return(c)
36
+ end
37
+
38
+ #
39
+ # id is to_s
40
+ #
41
+ def to_s()
42
+ @cId
43
+ end
44
+
45
+ #
46
+ # return the value depending on whether its a file or
47
+ # string
48
+ #
49
+ def value()
50
+ if @isFile
51
+ raise "Cannot find #{@cId}" unless File.exist?(@cId)
52
+ raise "Cannot read #{@cId}" unless File.stat(@cId).readable?
53
+ return(File.open(@cId,"r").read)
54
+ else
55
+ return(@str)
56
+ end
57
+ end
58
+
59
+ end
@@ -0,0 +1,99 @@
1
+ #
2
+ # CompressionTask
3
+ #
4
+ # task object to handle file compressions, used
5
+ # with TaskMaster
6
+ #
7
+ # $Id: CompressionTask.rb,v 1.3 2003/11/14 14:51:44 cilibrar Exp $
8
+
9
+ require 'CompLearnLib/Task.rb'
10
+ require 'CompLearnLib/CLConfig.rb'
11
+ require 'CompLearnLib/FoundComp.rb'
12
+
13
+ if FoundComp::FOUNDBZIP2
14
+ require 'bz2'
15
+ end
16
+
17
+ if FoundComp::FOUNDGZIP
18
+ require 'zlib'
19
+ end
20
+
21
+ include MTask
22
+
23
+ def gzipCompress(str)
24
+ level = Zlib::BEST_COMPRESSION
25
+ z = Zlib::Deflate.new(level)
26
+ dst = z.deflate(str, Zlib::FINISH)
27
+ z.close
28
+ dst
29
+ end
30
+
31
+ class CompressionTask < Task
32
+
33
+ attr_reader :idTotal
34
+
35
+ def initialize(cObjects)
36
+ @cObjects = cObjects.clone
37
+ @idTotal = ""
38
+ cObjects.each { |obj|
39
+ idTotal << obj.to_s
40
+ }
41
+ end
42
+
43
+ def execute()
44
+ total = ""
45
+ @cObjects.each { |obj|
46
+ total << obj.value
47
+ }
48
+ compmethod = CLConfig.getDefaultConfig().compressor
49
+
50
+ case compmethod.downcase
51
+
52
+ when "bz2", "bzip2", "bzip", "bz"
53
+
54
+ if FoundComp::FOUNDBZIP2
55
+ reply(BZ2.compress(total).size)
56
+ else
57
+ fail "Sorry, bzip2 compression is not installed on this system."
58
+ end
59
+
60
+ when "gzip", "gz"
61
+ if FoundComp::FOUNDGZIP
62
+ reply(gzipCompress(total).size)
63
+ else
64
+ fail "Sorry, gzip compression is not installed on this system."
65
+ end
66
+
67
+ when "shell"
68
+ cmd = CLConfig.getDefaultConfig().compressorCommand
69
+ result = 0
70
+ begin
71
+ f = IO.popen(cmd, "w+")
72
+ writer = Thread.new() {
73
+ blocksize, inpos = 1024, 0
74
+ while inpos < total.size
75
+ wanted = total.size - inpos
76
+ wanted = blocksize if wanted > blocksize
77
+ inpos += f.write(total[inpos...inpos+wanted])
78
+ end
79
+ f.close_write
80
+ }
81
+ result = f.read.size
82
+ f.close
83
+ rescue
84
+ puts "Got error: #{$!}"
85
+ result = -1
86
+ end
87
+ reply(result)
88
+
89
+ when "none"
90
+ fail "Sorry, no valid compressors installed!"
91
+
92
+ else
93
+ fail "Sorry, compression method #{compmethod} is not supported"
94
+
95
+ end
96
+
97
+ end
98
+ end
99
+
@@ -0,0 +1,18 @@
1
+ class DistMatrix
2
+
3
+ def DistMatrix.readFromFile(f)
4
+ f = File.open(f, 'r') if f.is_a?(String)
5
+ result = f.readlines
6
+ result.delete_if { |line| line =~ /^#/ || line !~ /\S+/ }
7
+ result.collect { |line| line.split(' ').collect { |i| i.to_f } }
8
+ end
9
+
10
+ def DistMatrix.convertToString(distmatrix)
11
+ result = ''
12
+ distmatrix.each { |row|
13
+ result << row.join(' ') + "\n"
14
+ }
15
+ result
16
+ end
17
+
18
+ end
@@ -0,0 +1,10 @@
1
+ class FoundComp
2
+ FOUNDGZIP = true
3
+ FOUNDBZIP2 = true
4
+ VERSION = '0.6.2'
5
+ def FoundComp.defaultCompressor()
6
+ return "bzip2" if FOUNDBZIP2
7
+ return "gzip" if FOUNDGZIP
8
+ return "none"
9
+ end
10
+ end
@@ -0,0 +1,10 @@
1
+ class FoundComp
2
+ FOUNDGZIP = @FOUNDGZIP@
3
+ FOUNDBZIP2 = @FOUNDBZIP2@
4
+ VERSION = '@VERSION@'
5
+ def FoundComp.defaultCompressor()
6
+ return "bzip2" if FOUNDBZIP2
7
+ return "gzip" if FOUNDGZIP
8
+ return "none"
9
+ end
10
+ end
@@ -0,0 +1,248 @@
1
+ #
2
+ # Ncd calculations
3
+ #
4
+ # $Id: Ncd.rb,v 1.2 2003/10/30 18:03:21 cilibrar Exp $
5
+ #
6
+
7
+ require 'CompLearnLib/CLConfig'
8
+ require 'CompLearnLib/CompressionObject'
9
+ require 'CompLearnLib/CompressionTask'
10
+ require 'CompLearnLib/Task'
11
+ include MTask
12
+
13
+ class Ncd
14
+
15
+ #private_class_method :ncd
16
+
17
+ #
18
+ #
19
+ def initialize()
20
+ @config = CLConfig.getDefaultConfig()
21
+ # for now
22
+ end
23
+
24
+ ### private
25
+
26
+ #
27
+ # get the ncd given pre compressed input
28
+ #
29
+ # ca: size of compressed a
30
+ # cb: size of compressed b
31
+ # cab: size of compressed ab
32
+ # cba: size of compressed ba
33
+ #
34
+ # returns: ncd of a and b
35
+ #
36
+ def ncd(ca,cb,cab,cba)
37
+ maxk = ca < cb ? cb : ca
38
+ kab = cba - cb
39
+ kba = cab - ca
40
+ maxck = kab < kba ? kba : kab
41
+ (maxck.to_f / maxk.to_f).to_f
42
+ end
43
+
44
+ #
45
+ # execute the list of given compression tasks
46
+ #
47
+ # list: a list of objects to compress of the
48
+ # form [[object1],[object1,object2],..]
49
+ #
50
+ # returns: a hash of compressed sizes, the key to
51
+ # the hash is the id (or concatenation of ids)
52
+ # of the compressible object(s)
53
+ #
54
+ def compress(list)
55
+
56
+ # dispatch compression tasks
57
+ results = { }
58
+ todo = list.size
59
+ TaskMaster.init
60
+ list.each { |item|
61
+ t = CompressionTask.new(item)
62
+ TaskMaster.enqueue(t) { |res, t, src|
63
+ # XXX put in resv chr
64
+ results[t.idTotal] = res
65
+ todo -= 1
66
+ }
67
+ }
68
+
69
+ # harvest results
70
+ while(todo != 0)
71
+ TaskMaster.waitForReply
72
+ end
73
+
74
+ # add vice versa compression if symmetric
75
+ if @config.isSymmetric?
76
+ list.each { |item|
77
+ if item.size > 1
78
+ abName = item.join()
79
+ newItem = item.reverse()
80
+ baName = newItem.join()
81
+ results[baName] = results[abName]
82
+ end
83
+ }
84
+ end
85
+
86
+ return(results)
87
+ end
88
+
89
+ #
90
+ # build singles and pairs to compress from the
91
+ # given list
92
+ #
93
+ def buildPairs(list)
94
+ cList = Array.new()
95
+ list.each { |item|
96
+ cList.push([item])
97
+ if @config.isSymmetric?
98
+ list.each { |item2|
99
+ cList.push([item,item2])
100
+ }
101
+ else
102
+ list.each { |item2|
103
+ cList.push([item,item2])
104
+ cList.push([item2,item])
105
+ }
106
+ end
107
+ }
108
+ return(cList)
109
+ end
110
+
111
+ #
112
+ # build singles and pairs with the given object
113
+ #
114
+ def buildSinglePairs(a,list)
115
+ cList = Array.new()
116
+ list.each { |item|
117
+ cList.push([item])
118
+ if @config.isSymmetric?
119
+ cList.push([a,item])
120
+ else
121
+ cList.push([a,item])
122
+ cList.push([item,a])
123
+ end
124
+ }
125
+ return(cList)
126
+ end
127
+
128
+ ### single pair ncd
129
+
130
+ #
131
+ # calculate ncd from two cObjects
132
+ #
133
+ def ncdSingle(a,b)
134
+ if @config.isSymmetric?
135
+ c = compress([[a],[b],[a,b]])
136
+ c["#{b}#{a}"] = c["#{a}#{b}"]
137
+ else
138
+ c = compress([[a],[b],[a,b],[b,a]])
139
+ end
140
+ ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
141
+ end
142
+
143
+ #
144
+ # calculate ncd from 2 strings
145
+ #
146
+ def ncdString(a, b)
147
+ aObj = CompressionObject.fromString(a, a)
148
+ bObj = CompressionObject.fromString(b, b)
149
+ ncdSingle(aObj,bObj)
150
+ end
151
+
152
+ #
153
+ # calculate ncd from 2 files
154
+ #
155
+ def ncdFile(a, b)
156
+ aObj = CompressionObject.fromFile(a)
157
+ bObj = CompressionObject.fromFile(b)
158
+ ncdSingle(aObj,bObj)
159
+ end
160
+
161
+ ### vector ncd
162
+
163
+ #
164
+ # calculate the ncd between one object and a list
165
+ # of others
166
+ #
167
+ def ncdVector(a,list)
168
+
169
+ # precompress everything necessary
170
+ cList = buildSinglePairs(a,list)
171
+ cList.push([a])
172
+ c = compress(cList)
173
+
174
+ # calculate ncd of a with everything else
175
+ res = Array.new()
176
+ list.each { |b|
177
+ res.push(ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"]))
178
+ }
179
+ return(res)
180
+ end
181
+
182
+ #
183
+ # calculate the mutual ncd of a string with a list of strings
184
+ #
185
+ def ncdVectorString(a, list)
186
+ aObj = CompressionObject.fromString(a)
187
+ listObj = list.collect { |item|
188
+ CompressionObject.fromString(item)
189
+ }
190
+ ncdVector(aObj,listObj)
191
+ end
192
+
193
+ #
194
+ # calculate the mutual ncd of a file with a list of files
195
+ #
196
+
197
+ def ncdVectorFile(a, list)
198
+ aObj = CompressionObject.fromFile(a)
199
+ listObj = list.collect { |item|
200
+ CompressionObject.fromFile(item)
201
+ }
202
+ ncdVector(aObj,listObj)
203
+ end
204
+
205
+
206
+ ### matrix ncd
207
+
208
+ #
209
+ # calculate the mutual ncd of all lists objects
210
+ #
211
+ def ncdMatrix(list)
212
+ # precompress everything necessary
213
+ cList = buildPairs(list)
214
+ c = compress(cList)
215
+
216
+ # calculate ncd of a with everything else
217
+ res = Array.new()
218
+ list.each { |a|
219
+ res << [ ]
220
+ list.each { |b|
221
+ res[-1] << ncd(c["#{a}"],c["#{b}"],c["#{a}#{b}"],c["#{b}#{a}"])
222
+ }
223
+ }
224
+ return(res)
225
+ end
226
+
227
+ #
228
+ # calculate the mutual ncd of a list of strings
229
+ #
230
+ def ncdMatrixString(list) # most caching benefit
231
+ listObj = list.collect { |item|
232
+ CompressionObject.fromString(item)
233
+ }
234
+ ncdMatrix(listObj)
235
+ end
236
+
237
+ #
238
+ # calculate the mutual ncd of a list of files
239
+ #
240
+ def ncdMatrixFile(list)
241
+ listObj = list.collect { |item|
242
+ CompressionObject.fromFile(item)
243
+ }
244
+ ncdMatrix(listObj)
245
+ end
246
+ end
247
+
248
+