complearn 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (104) hide show
  1. data/AUTHORS +13 -0
  2. data/COPYING +340 -0
  3. data/ChangeLog +0 -0
  4. data/INSTALL +231 -0
  5. data/Makefile +352 -0
  6. data/Makefile.am +76 -0
  7. data/Makefile.in +352 -0
  8. data/NEWS +7 -0
  9. data/README +0 -0
  10. data/aclocal.m4 +104 -0
  11. data/bin/Makefile +209 -0
  12. data/bin/Makefile.am +8 -0
  13. data/bin/Makefile.in +209 -0
  14. data/bin/labeltree +68 -0
  15. data/bin/labeltree.in +68 -0
  16. data/bin/makesvm +70 -0
  17. data/bin/makesvm.in +70 -0
  18. data/bin/maketree +98 -0
  19. data/bin/maketree.in +98 -0
  20. data/bin/ncd +43 -0
  21. data/bin/ncd.in +43 -0
  22. data/bin/ncdmatrix +54 -0
  23. data/bin/ncdmatrix.in +54 -0
  24. data/bin/ncdvector +50 -0
  25. data/bin/ncdvector.in +50 -0
  26. data/complearn-0.6.2.gem +0 -0
  27. data/complearn.gemspec +57 -0
  28. data/config.log +597 -0
  29. data/config.status +1082 -0
  30. data/configure +4922 -0
  31. data/configure.ac +91 -0
  32. data/confstat5FpLBf/config.h +65 -0
  33. data/confstat5FpLBf/subs-1.sed +50 -0
  34. data/confstat5FpLBf/subs-2.sed +13 -0
  35. data/confstat5FpLBf/subs.frag +0 -0
  36. data/confstat5FpLBf/subs.sed +59 -0
  37. data/confstat5FpLBf/undefs.sed +24 -0
  38. data/doc/FAQ.txt +67 -0
  39. data/doc/Makefile +286 -0
  40. data/doc/Makefile.am +11 -0
  41. data/doc/Makefile.in +286 -0
  42. data/doc/devguide.txt +15 -0
  43. data/doc/example.complearnrc +14 -0
  44. data/doc/examples.txt +35 -0
  45. data/doc/man/Makefile +255 -0
  46. data/doc/man/Makefile.am +11 -0
  47. data/doc/man/Makefile.in +255 -0
  48. data/doc/man/complearn.5 +91 -0
  49. data/doc/man/labeltree.1 +35 -0
  50. data/doc/man/makesvm.1 +60 -0
  51. data/doc/man/maketree.1 +58 -0
  52. data/doc/man/ncd.1 +51 -0
  53. data/doc/man/ncdmatrix.1 +40 -0
  54. data/doc/man/ncdvector.1 +42 -0
  55. data/doc/readme.txt +101 -0
  56. data/doc/userguide.txt +46 -0
  57. data/examples/genes/blueWhale.txt +1 -0
  58. data/examples/genes/cat.txt +1 -0
  59. data/examples/genes/chimpanzee.txt +1 -0
  60. data/examples/genes/finWhale.txt +1 -0
  61. data/examples/genes/graySeal.txt +1 -0
  62. data/examples/genes/harborSeal.txt +1 -0
  63. data/examples/genes/horse.txt +1 -0
  64. data/examples/genes/human.txt +1 -0
  65. data/examples/genes/mouse.txt +1 -0
  66. data/examples/genes/rat.txt +1 -0
  67. data/ext/Makefile +167 -0
  68. data/ext/Quartet.c +399 -0
  69. data/ext/Quartet.h +62 -0
  70. data/ext/TreeScore.c +244 -0
  71. data/ext/TreeScore.h +3 -0
  72. data/ext/config.h +65 -0
  73. data/ext/config.h.in +64 -0
  74. data/ext/extconf.rb +3 -0
  75. data/ext/lib/CompLearnLib/CLConfig.rb +241 -0
  76. data/ext/lib/CompLearnLib/CompressionObject.rb +59 -0
  77. data/ext/lib/CompLearnLib/CompressionTask.rb +99 -0
  78. data/ext/lib/CompLearnLib/DistMatrix.rb +18 -0
  79. data/ext/lib/CompLearnLib/FoundComp.rb +10 -0
  80. data/ext/lib/CompLearnLib/FoundComp.rb.in +10 -0
  81. data/ext/lib/CompLearnLib/Ncd.rb +248 -0
  82. data/ext/lib/CompLearnLib/RunEnv.rb +150 -0
  83. data/ext/lib/CompLearnLib/Task.rb +39 -0
  84. data/ext/lib/CompLearnLib/TaskMaster.rb +13 -0
  85. data/ext/lib/CompLearnLib/TaskMasterMPI.rb +112 -0
  86. data/ext/lib/CompLearnLib/TaskMasterSingle.rb +39 -0
  87. data/ext/lib/CompLearnLib/Tree.rb +300 -0
  88. data/install-sh +294 -0
  89. data/missing +336 -0
  90. data/mkinstalldirs +111 -0
  91. data/o +24 -0
  92. data/scripts/CompLearn.iss +89 -0
  93. data/scripts/CompLearn.iss.in +89 -0
  94. data/scripts/debian/changelog +6 -0
  95. data/scripts/debian/control +14 -0
  96. data/scripts/makeSetup.sh +23 -0
  97. data/scripts/makeSetup.sh.in +23 -0
  98. data/scripts/makedeb.zsh +46 -0
  99. data/scripts/makedeb.zsh.in +46 -0
  100. data/tests/alltests.rb +2 -0
  101. data/tests/bz2test.rb +516 -0
  102. data/tests/sshagent-test.rb +48 -0
  103. data/tests/tests.rb +275 -0
  104. metadata +164 -0
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/ruby1.8
2
+
3
+ require 'optparse'
4
+ require 'CompLearnLib/CLConfig'
5
+
6
+ setup = Hash.new('')
7
+
8
+ ARGV.options { |opt|
9
+
10
+ opt.banner = "Usage: labeltree tree.dot {dirname|filelist.txt}"
11
+ opt.on("")
12
+ opt.on("Relabels a tree's leaf vertices according to filenames")
13
+ opt.on("If a directory is specified, all files in that directory are used")
14
+ opt.on("If a file is specified, it must contain filenames one per line")
15
+ opt.on("The original unlabelled tree.dot will be renamed tree.dot.orig")
16
+
17
+ opt.on("Options:\n")
18
+ opt.on("--configfile filename", "-f", String,
19
+ "Read alternate configuration file") { |setup['cfgfile']| }
20
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
21
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
22
+
23
+ opt.parse!
24
+
25
+ if ARGV.size < 2
26
+ puts opt
27
+ exit(1)
28
+ end
29
+ }
30
+
31
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
32
+ cfg = CLConfig.getDefaultConfig
33
+
34
+ begin
35
+ treefile, objname = ARGV
36
+
37
+ files = cfg.getFilelistFromDirOrFile(objname)
38
+
39
+ newnames = { }
40
+
41
+ files.each_index { |i|
42
+ fname = File.basename(files[i])
43
+ fname.gsub!(/[.].*/, '')
44
+ fname.gsub!(/[^a-zA-Z0-9]+/, '')
45
+ fname = "zz#{fname}" if fname =~ /^[0-9]/ && fname =~ /[a-zA-Z]/
46
+ newnames[i.to_s] = fname
47
+ puts "Relabelling #{i} to #{fname}"
48
+ }
49
+
50
+ f = File.open(treefile, 'r')
51
+ orig = f.read
52
+
53
+ newnames.each { |k, v|
54
+ orig.gsub!(Regexp.new("^\s*#{k}\s*--"), "#{v} --")
55
+ orig.gsub!(Regexp.new("--\s*#{k}\s*$"), "-- #{v}")
56
+ }
57
+
58
+ File.rename(treefile, "#{treefile}.orig")
59
+
60
+ f = File.open(treefile, 'w')
61
+ f.write(orig)
62
+ f.close
63
+
64
+ rescue
65
+ puts "Exception: #{$!}"
66
+ end
67
+ exit(0)
68
+
@@ -0,0 +1,68 @@
1
+ #!@RUBYBIN@
2
+
3
+ require 'optparse'
4
+ require 'CompLearnLib/CLConfig'
5
+
6
+ setup = Hash.new('')
7
+
8
+ ARGV.options { |opt|
9
+
10
+ opt.banner = "Usage: labeltree tree.dot {dirname|filelist.txt}"
11
+ opt.on("")
12
+ opt.on("Relabels a tree's leaf vertices according to filenames")
13
+ opt.on("If a directory is specified, all files in that directory are used")
14
+ opt.on("If a file is specified, it must contain filenames one per line")
15
+ opt.on("The original unlabelled tree.dot will be renamed tree.dot.orig")
16
+
17
+ opt.on("Options:\n")
18
+ opt.on("--configfile filename", "-f", String,
19
+ "Read alternate configuration file") { |setup['cfgfile']| }
20
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
21
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
22
+
23
+ opt.parse!
24
+
25
+ if ARGV.size < 2
26
+ puts opt
27
+ exit(1)
28
+ end
29
+ }
30
+
31
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
32
+ cfg = CLConfig.getDefaultConfig
33
+
34
+ begin
35
+ treefile, objname = ARGV
36
+
37
+ files = cfg.getFilelistFromDirOrFile(objname)
38
+
39
+ newnames = { }
40
+
41
+ files.each_index { |i|
42
+ fname = File.basename(files[i])
43
+ fname.gsub!(/[.].*/, '')
44
+ fname.gsub!(/[^a-zA-Z0-9]+/, '')
45
+ fname = "zz#{fname}" if fname =~ /^[0-9]/ && fname =~ /[a-zA-Z]/
46
+ newnames[i.to_s] = fname
47
+ puts "Relabelling #{i} to #{fname}"
48
+ }
49
+
50
+ f = File.open(treefile, 'r')
51
+ orig = f.read
52
+
53
+ newnames.each { |k, v|
54
+ orig.gsub!(Regexp.new("^\s*#{k}\s*--"), "#{v} --")
55
+ orig.gsub!(Regexp.new("--\s*#{k}\s*$"), "-- #{v}")
56
+ }
57
+
58
+ File.rename(treefile, "#{treefile}.orig")
59
+
60
+ f = File.open(treefile, 'w')
61
+ f.write(orig)
62
+ f.close
63
+
64
+ rescue
65
+ puts "Exception: #{$!}"
66
+ end
67
+ exit(0)
68
+
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/ruby1.8
2
+
3
+ require 'optparse'
4
+
5
+ require 'CompLearnLib/Ncd.rb'
6
+
7
+ setup = Hash.new('')
8
+
9
+ TaskMaster.init()
10
+
11
+ ARGV.options { |opt|
12
+
13
+ opt.banner = "Usage: makesvm [options] filelist.txt"
14
+ opt.on("")
15
+ opt.on("Creates SVM classification training files from NCD's of listed files")
16
+ opt.on("filelist.txt must be a list of files, one per line")
17
+ opt.on("Each line must consist of:")
18
+ opt.on("<category> <type> <filename>")
19
+ opt.on("where <category> is an integer specifying the category")
20
+ opt.on("and <type> is either F, G, or P")
21
+ opt.on("F means this file is to be used as a feature extractor")
22
+ opt.on("G means this file is to be used for training (given)")
23
+ opt.on("P means this file is to be used for testing (prediction)")
24
+ opt.on("Options:\n")
25
+ opt.on("--configfile filename", "-f", String,
26
+ "Read alternate configuration file") { |setup['cfgfile']| }
27
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
28
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
29
+
30
+ opt.parse!
31
+
32
+ if ARGV.size < 1
33
+ puts opt
34
+ exit(1)
35
+ end
36
+ }
37
+
38
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
39
+ cfg = CLConfig.getDefaultConfig
40
+
41
+ filelist = ARGV[0]
42
+
43
+ training, features, testing = cfg.readTaggedFileList(filelist)
44
+
45
+ unless features.size > 0
46
+ puts "You must have at least one feature specified (with an F type code)"
47
+ exit(1)
48
+ end
49
+
50
+ n = Ncd.new()
51
+
52
+ [['trainsvm.txt', training], ['testsvm.txt', testing]].each { |outname, vecs|
53
+
54
+ f = File.open(outname, "w")
55
+
56
+ vecs.each { |tag, fname|
57
+ f.write "#{tag} "
58
+ fv = n.ncdVectorFile(fname, features)
59
+ fv.each_index { |i|
60
+ f.write "#{i+1}:#{fv[i]} "
61
+ }
62
+ f.puts
63
+ }
64
+
65
+ f.close
66
+
67
+ }
68
+
69
+ exit(0)
70
+
@@ -0,0 +1,70 @@
1
+ #!@RUBYBIN@
2
+
3
+ require 'optparse'
4
+
5
+ require 'CompLearnLib/Ncd.rb'
6
+
7
+ setup = Hash.new('')
8
+
9
+ TaskMaster.init()
10
+
11
+ ARGV.options { |opt|
12
+
13
+ opt.banner = "Usage: makesvm [options] filelist.txt"
14
+ opt.on("")
15
+ opt.on("Creates SVM classification training files from NCD's of listed files")
16
+ opt.on("filelist.txt must be a list of files, one per line")
17
+ opt.on("Each line must consist of:")
18
+ opt.on("<category> <type> <filename>")
19
+ opt.on("where <category> is an integer specifying the category")
20
+ opt.on("and <type> is either F, G, or P")
21
+ opt.on("F means this file is to be used as a feature extractor")
22
+ opt.on("G means this file is to be used for training (given)")
23
+ opt.on("P means this file is to be used for testing (prediction)")
24
+ opt.on("Options:\n")
25
+ opt.on("--configfile filename", "-f", String,
26
+ "Read alternate configuration file") { |setup['cfgfile']| }
27
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
28
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
29
+
30
+ opt.parse!
31
+
32
+ if ARGV.size < 1
33
+ puts opt
34
+ exit(1)
35
+ end
36
+ }
37
+
38
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
39
+ cfg = CLConfig.getDefaultConfig
40
+
41
+ filelist = ARGV[0]
42
+
43
+ training, features, testing = cfg.readTaggedFileList(filelist)
44
+
45
+ unless features.size > 0
46
+ puts "You must have at least one feature specified (with an F type code)"
47
+ exit(1)
48
+ end
49
+
50
+ n = Ncd.new()
51
+
52
+ [['trainsvm.txt', training], ['testsvm.txt', testing]].each { |outname, vecs|
53
+
54
+ f = File.open(outname, "w")
55
+
56
+ vecs.each { |tag, fname|
57
+ f.write "#{tag} "
58
+ fv = n.ncdVectorFile(fname, features)
59
+ fv.each_index { |i|
60
+ f.write "#{i+1}:#{fv[i]} "
61
+ }
62
+ f.puts
63
+ }
64
+
65
+ f.close
66
+
67
+ }
68
+
69
+ exit(0)
70
+
@@ -0,0 +1,98 @@
1
+ #!/usr/bin/ruby1.8
2
+
3
+ require 'optparse'
4
+ require 'CompLearn'
5
+ require 'CompLearnLib/TaskMaster'
6
+ require 'CompLearnLib/Tree'
7
+ require 'CompLearnLib/CLConfig'
8
+ require 'CompLearnLib/DistMatrix'
9
+
10
+ include MTree
11
+
12
+ setup = Hash.new('')
13
+
14
+ TaskMaster.init()
15
+
16
+ ARGV.options { |opt|
17
+
18
+ opt.banner = "Usage: maketree [options] distmat.txt treeout.dot"
19
+ opt.on("")
20
+ opt.on("Computes a good tree to fit a distance matrix distmat.txt.")
21
+ opt.on("distmat.txt must be a distance matrix file listing n*n")
22
+ opt.on("positive real numbers. Each line must contain n numbers")
23
+ opt.on("separated by spaces. The resultant tree will be written")
24
+ opt.on("to treeout.dot. This file is suitable for further processing")
25
+ opt.on("by labeltree, dot, or neato, for example.")
26
+ opt.on("If relative filenames are used, inputDir and cwd will be searched\n")
27
+ opt.on("Options:\n")
28
+ opt.on("--configfile filename", "-f", String,
29
+ "Read alternate configuration file") { |setup['cfgfile']| }
30
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
31
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
32
+
33
+ opt.parse!
34
+
35
+ if ARGV.size < 2
36
+ puts opt
37
+ exit(1)
38
+ end
39
+
40
+ }
41
+
42
+ begin
43
+
44
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
45
+ cfg = CLConfig.getDefaultConfig
46
+ distmat, treename = ARGV
47
+ dm = DistMatrix.readFromFile(distmat)
48
+ speciescount = dm.size
49
+ best = Tree.randomTree(speciescount)
50
+ TaskMaster.init
51
+ TaskMaster.storeEverywhere('dm', dm)
52
+ ts = cfg.isUseBestThirdOnly?()?
53
+ TreeScore.makeBestList(dm):
54
+ TreeScore.makeFullList(dm)
55
+ ts.penalty = cfg.unpairedPenalty()
56
+ maxfailedtries = cfg.maxFailedTries()
57
+ treespertry = cfg.treesPerTry()
58
+ bestscore = ts.score(best)
59
+
60
+ puts "The penalty term is #{ts.penalty}"
61
+ puts "Score is first #{bestscore}"
62
+
63
+ enqc = 0
64
+ todo = treespertry
65
+ failcount = 0
66
+ while todo > 0 && failcount < maxfailedtries
67
+ t = TreeTask.new(best, 100, cfg.unpairedPenalty())
68
+ enqc += 1
69
+ TaskMaster.enqueue(t) { |res,t,src|
70
+ newbest, newbestscore = res
71
+ if newbestscore > bestscore
72
+ bestscore = newbestscore
73
+ best = newbest
74
+ failcount = 0
75
+ puts "New best score found: #{bestscore}"
76
+ else
77
+ failcount += 1
78
+ end
79
+ enqc -= 1
80
+ }
81
+ todo -= 1
82
+ end
83
+
84
+ while enqc > 0
85
+ TaskMaster.waitForSlave()
86
+ end
87
+
88
+ bestscore = ts.score(best)
89
+ puts "Best tree score is #{bestscore}"
90
+ f = File.open(treename, 'w')
91
+ names = [ ]
92
+ speciescount.times { |i| names << i.to_s }
93
+ f.write(best.toDotString(names, 'tree', "Score: #{bestscore}"))
94
+
95
+ rescue
96
+ puts "Exception: #{$!}"
97
+ end
98
+ exit(0)
@@ -0,0 +1,98 @@
1
+ #!@RUBYBIN@
2
+
3
+ require 'optparse'
4
+ require 'CompLearn'
5
+ require 'CompLearnLib/TaskMaster'
6
+ require 'CompLearnLib/Tree'
7
+ require 'CompLearnLib/CLConfig'
8
+ require 'CompLearnLib/DistMatrix'
9
+
10
+ include MTree
11
+
12
+ setup = Hash.new('')
13
+
14
+ TaskMaster.init()
15
+
16
+ ARGV.options { |opt|
17
+
18
+ opt.banner = "Usage: maketree [options] distmat.txt treeout.dot"
19
+ opt.on("")
20
+ opt.on("Computes a good tree to fit a distance matrix distmat.txt.")
21
+ opt.on("distmat.txt must be a distance matrix file listing n*n")
22
+ opt.on("positive real numbers. Each line must contain n numbers")
23
+ opt.on("separated by spaces. The resultant tree will be written")
24
+ opt.on("to treeout.dot. This file is suitable for further processing")
25
+ opt.on("by labeltree, dot, or neato, for example.")
26
+ opt.on("If relative filenames are used, inputDir and cwd will be searched\n")
27
+ opt.on("Options:\n")
28
+ opt.on("--configfile filename", "-f", String,
29
+ "Read alternate configuration file") { |setup['cfgfile']| }
30
+ opt.on("--help", "-h", "Print this help text and exit") { puts opt; exit 0 }
31
+ opt.on("--version", "-v", "Print version and exit") { CLConfig.printVersionAndExit() }
32
+
33
+ opt.parse!
34
+
35
+ if ARGV.size < 2
36
+ puts opt
37
+ exit(1)
38
+ end
39
+
40
+ }
41
+
42
+ begin
43
+
44
+ CLConfig.setDefaultConfig(CLConfig.new(setup['cfgfile'])) if setup.has_key?('cfgfile')
45
+ cfg = CLConfig.getDefaultConfig
46
+ distmat, treename = ARGV
47
+ dm = DistMatrix.readFromFile(distmat)
48
+ speciescount = dm.size
49
+ best = Tree.randomTree(speciescount)
50
+ TaskMaster.init
51
+ TaskMaster.storeEverywhere('dm', dm)
52
+ ts = cfg.isUseBestThirdOnly?()?
53
+ TreeScore.makeBestList(dm):
54
+ TreeScore.makeFullList(dm)
55
+ ts.penalty = cfg.unpairedPenalty()
56
+ maxfailedtries = cfg.maxFailedTries()
57
+ treespertry = cfg.treesPerTry()
58
+ bestscore = ts.score(best)
59
+
60
+ puts "The penalty term is #{ts.penalty}"
61
+ puts "Score is first #{bestscore}"
62
+
63
+ enqc = 0
64
+ todo = treespertry
65
+ failcount = 0
66
+ while todo > 0 && failcount < maxfailedtries
67
+ t = TreeTask.new(best, 100, cfg.unpairedPenalty())
68
+ enqc += 1
69
+ TaskMaster.enqueue(t) { |res,t,src|
70
+ newbest, newbestscore = res
71
+ if newbestscore > bestscore
72
+ bestscore = newbestscore
73
+ best = newbest
74
+ failcount = 0
75
+ puts "New best score found: #{bestscore}"
76
+ else
77
+ failcount += 1
78
+ end
79
+ enqc -= 1
80
+ }
81
+ todo -= 1
82
+ end
83
+
84
+ while enqc > 0
85
+ TaskMaster.waitForSlave()
86
+ end
87
+
88
+ bestscore = ts.score(best)
89
+ puts "Best tree score is #{bestscore}"
90
+ f = File.open(treename, 'w')
91
+ names = [ ]
92
+ speciescount.times { |i| names << i.to_s }
93
+ f.write(best.toDotString(names, 'tree', "Score: #{bestscore}"))
94
+
95
+ rescue
96
+ puts "Exception: #{$!}"
97
+ end
98
+ exit(0)