rbbt 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +17 -0
  3. data/bin/rbbt_config +180 -0
  4. data/install_scripts/classifier/R/classify.R +36 -0
  5. data/install_scripts/classifier/Rakefile +140 -0
  6. data/install_scripts/get_abner.sh +2 -0
  7. data/install_scripts/get_banner.sh +25 -0
  8. data/install_scripts/get_biocreative.sh +72 -0
  9. data/install_scripts/get_crf++.sh +26 -0
  10. data/install_scripts/get_entrez.sh +4 -0
  11. data/install_scripts/get_go.sh +4 -0
  12. data/install_scripts/get_polysearch.sh +8 -0
  13. data/install_scripts/ner/Rakefile +206 -0
  14. data/install_scripts/ner/config/default.rb +52 -0
  15. data/install_scripts/norm/Rakefile +218 -0
  16. data/install_scripts/norm/config/cue_default.rb +10 -0
  17. data/install_scripts/norm/config/tokens_default.rb +79 -0
  18. data/install_scripts/norm/functions.sh +21 -0
  19. data/install_scripts/organisms/Rakefile +25 -0
  20. data/install_scripts/organisms/cgd.Rakefile +84 -0
  21. data/install_scripts/organisms/human.Rakefile +145 -0
  22. data/install_scripts/organisms/mgi.Rakefile +77 -0
  23. data/install_scripts/organisms/pombe.Rakefile +40 -0
  24. data/install_scripts/organisms/rake-include.rb +258 -0
  25. data/install_scripts/organisms/rgd.Rakefile +88 -0
  26. data/install_scripts/organisms/sgd.Rakefile +66 -0
  27. data/install_scripts/organisms/tair.Rakefile +54 -0
  28. data/install_scripts/organisms/worm.Rakefile +109 -0
  29. data/install_scripts/stopwords +1 -0
  30. data/install_scripts/wordlists/consonants +897 -0
  31. data/install_scripts/wordlists/stopwords +1 -0
  32. data/lib/rbbt/bow/bow.rb +87 -0
  33. data/lib/rbbt/bow/classifier.rb +118 -0
  34. data/lib/rbbt/bow/dictionary.rb +218 -0
  35. data/lib/rbbt/ner/abner.rb +34 -0
  36. data/lib/rbbt/ner/banner.rb +73 -0
  37. data/lib/rbbt/ner/regexpNER.rb +62 -0
  38. data/lib/rbbt/ner/rner.rb +227 -0
  39. data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
  40. data/lib/rbbt/ner/rnorm/tokens.rb +213 -0
  41. data/lib/rbbt/ner/rnorm.rb +142 -0
  42. data/lib/rbbt/sources/biocreative.rb +75 -0
  43. data/lib/rbbt/sources/biomart.rb +106 -0
  44. data/lib/rbbt/sources/entrez.rb +211 -0
  45. data/lib/rbbt/sources/go.rb +40 -0
  46. data/lib/rbbt/sources/organism.rb +197 -0
  47. data/lib/rbbt/sources/polysearch.rb +88 -0
  48. data/lib/rbbt/sources/pubmed.rb +111 -0
  49. data/lib/rbbt/util/arrayHash.rb +255 -0
  50. data/lib/rbbt/util/filecache.rb +72 -0
  51. data/lib/rbbt/util/index.rb +69 -0
  52. data/lib/rbbt/util/misc.rb +101 -0
  53. data/lib/rbbt/util/open.rb +207 -0
  54. data/lib/rbbt/util/simpleDSL.rb +87 -0
  55. data/lib/rbbt/util/tmpfile.rb +19 -0
  56. data/lib/rbbt/version.rb +10 -0
  57. data/lib/rbbt.rb +86 -0
  58. data/tasks/install.rake +123 -0
  59. metadata +114 -0
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Miguel Vazquez
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = rbbt
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2009 Miguel Vazquez. See LICENSE for details.
data/bin/rbbt_config ADDED
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+
6
+
7
+ require 'simpleconsole'
8
+
9
+ begin
10
+ require 'rbbt'
11
+ rescue Rbbt::NoConfig
12
+ $noconfig = true
13
+ end
14
+
15
+
16
+ $USAGE =<<EOT
17
+ #{__FILE__} <action> [<subaction>] [--force] [--organism <org>]
18
+ actions:
19
+ * configure: Set paths for data, cache, and tmp directories
20
+
21
+ * install:
22
+ * basic: Third party software
23
+ * databases: Entrez and Biocreative
24
+ * models: Gene Mention and Classification
25
+ * organisms: Rules to gather data for organisms
26
+ * all: 3party wordlists entrez biocreative go ner norm classifier organisms polysearch
27
+
28
+ * update:
29
+ * organisms: Gather data for organisms
30
+ * ner: Build Named Entity Recognition Models for Gene Mention
31
+ * classification:
32
+ Build Function/Process Classifiers
33
+
34
+ * purge_cache: Clean the non-persistent cache, which holds general things
35
+ downloaded using Open.read, like organism identifiers downloaded from
36
+ BioMart. The persistent cache, which hold pubmed articles or entrez gene
37
+ descriptions, is not cleaned, as these are not likely to change
38
+
39
+
40
+ EOT
41
+
42
+ class Controller < SimpleConsole::Controller
43
+
44
+ params :bool => {:f => :force},
45
+ :string => {:o => :organism}
46
+
47
+ def default
48
+ render :action => :usage
49
+ end
50
+
51
+ def help
52
+ render :action => :usage
53
+ end
54
+
55
+ def update
56
+ raise "Run #{__FILE__} configure first to configure rbbt" if $noconfig
57
+
58
+ case params[:id]
59
+ when "organisms"
60
+ @location = File.join(Rbbt.datadir,'organisms')
61
+ when "ner"
62
+ @location = File.join(Rbbt.datadir,'ner')
63
+ when "classifier"
64
+ @location = File.join(Rbbt.datadir,'classifier')
65
+ else
66
+ redirect_to :action => :help, :id => :update
67
+ end
68
+
69
+ $force = true if params[:force]
70
+ $org = params[:organism] if params[:organism]
71
+
72
+ end
73
+
74
+ def install
75
+ raise "Run #{__FILE__} configure first to configure rbbt" if $noconfig
76
+
77
+ case params[:id]
78
+ when "basic"
79
+ @tasks = %w(3party wordlists polysearch)
80
+ when "databases"
81
+ @tasks = %w(entrez biocreative go)
82
+ when "models"
83
+ @tasks = %w(ner norm classifier)
84
+ when "organisms"
85
+ @tasks = %w(organisms)
86
+ when "all"
87
+ @tasks = %w(3party wordlists entrez biocreative go ner norm classifier organisms polysearch)
88
+ when nil
89
+ redirect_to :action => :help, :id => :install
90
+ else
91
+ @tasks = [params[:id]]
92
+ end
93
+
94
+ $force = true if params[:force]
95
+ $org = params[:organism] if params[:organism]
96
+
97
+ end
98
+
99
+ def configure
100
+ end
101
+
102
+ def purge_cache
103
+ end
104
+
105
+ end
106
+
107
+ class View < SimpleConsole::View
108
+ def usage
109
+ puts $USAGE
110
+ end
111
+
112
+ def install
113
+ load File.join(Rbbt.rootdir, 'tasks/install.rake')
114
+
115
+ @tasks.each{|t|
116
+ puts "Invoking #{ t }"
117
+ Rake::Task[t].invoke
118
+ }
119
+ end
120
+
121
+ def update
122
+
123
+ puts "Changing directory to #{@location}"
124
+ chdir @location
125
+
126
+ load "./Rakefile"
127
+
128
+ Rake::Task['default'].invoke
129
+ end
130
+
131
+
132
+ def configure
133
+
134
+ defaultdir = File.join(ENV['HOME'],'rbbt')
135
+
136
+ cachedir = File.join(defaultdir, 'cache')
137
+ tmpdir = File.join(defaultdir, 'tmp')
138
+ datadir = File.join(defaultdir, 'data')
139
+
140
+ puts "Please indicate where you wish to place the data directories"
141
+ puts
142
+
143
+ puts
144
+ puts "* Cache Directory: This directory will hold downloads, from PubMed,
145
+ Entrez and other, for local store. It might grow considerably."
146
+ print "[#{ cachedir }]? "
147
+ input = STDIN.gets
148
+ cachedir = input if input =~ /\w/
149
+
150
+ puts
151
+ puts "* Tmp Directory: Temporary files."
152
+ print "[#{ tmpdir }]? "
153
+ input = STDIN.gets
154
+ tmpdir = input if input =~ /\w/
155
+
156
+ puts
157
+ puts "* Data Directory: Holds data from organisms, databases, third party software, etc."
158
+ print "[#{ datadir }]? "
159
+ input = STDIN.gets
160
+ datadir = input if input =~ /\w/
161
+
162
+
163
+
164
+ fout = File.open(File.join(ENV['HOME'], '.rbbt'),'w')
165
+ fout.puts "cachedir: #{cachedir}"
166
+ fout.puts "tmpdir: #{tmpdir}"
167
+ fout.puts "datadir: #{datadir}"
168
+ fout.close
169
+
170
+ end
171
+
172
+ def purge_cache
173
+ FileUtils.rm Dir.glob(File.join(Rbbt.cachedir,'open-remote','*'))
174
+ end
175
+
176
+ end
177
+
178
+ SimpleConsole::Application.run(ARGV, Controller, View)
179
+
180
+
@@ -0,0 +1,36 @@
1
+ library('e1071')
2
+
3
+ BOW.norm <- function(x, weights = NULL){
4
+ x = 1 + log(x);
5
+ x[x==-Inf] = 0;
6
+ x.sum = as.matrix(x) %*% matrix(1,nrow=dim(x)[2],ncol=1);
7
+ x.sum = matrix(100/x.sum,nrow=length(x.sum),ncol=dim(x)[2]);
8
+ x.norm = x * x.sum;
9
+ rm(x.sum);
10
+ x.norm[is.na(x.norm)] = 0
11
+
12
+ if (!is.null(weights)){
13
+ x.norm = x.norm * matrix(abs(weights),ncol=length(weights),nrow=dim(x.norm)[1],byrow=T)
14
+ }
15
+
16
+ x.norm;
17
+ }
18
+
19
+
20
+ BOW.classification.model <- function(features, modelfile, dictfile = NULL){
21
+ feats = read.table(features, sep="\t", header=T, row.names=1);
22
+
23
+ if (!is.null(dictfile)){
24
+ svm.weights = read.table(file=dictfile, sep="\t")[2];
25
+ }else {
26
+ svm.weights = NULL;
27
+ }
28
+ feats[-1] = BOW.norm(feats[-1], svm.weights);
29
+ svm.model = svm(Class ~ ., data=feats, svm.weights);
30
+ save(svm.model,svm.weights, file=modelfile);
31
+ }
32
+
33
+ BOW.classification.classify <- function(modelfile, x, weights = NULL){
34
+ x = BOW.norm(x, weights);
35
+ predict(modelfile, x);
36
+ }
@@ -0,0 +1,140 @@
1
+ require 'rbbt'
2
+ require 'rbbt/sources/organism'
3
+ require 'rbbt/sources/pubmed'
4
+ require 'rbbt/bow/bow'
5
+ require 'rbbt/bow/dictionary'
6
+ require 'rbbt/bow/classifier'
7
+ require 'rbbt/util/misc'
8
+
9
+ require 'progress-monitor'
10
+ require 'rand'
11
+
12
+ $hi = ENV['hi'] || 0.8
13
+ $low = ENV['low'] || 0.01
14
+ $max = ENV['max'] || 3000
15
+ $bigrams = ENV['bigrams'] == 'true' || false
16
+
17
+ $ndocs = ENV['ndocs'] || 5000
18
+
19
+ desc "Bilds Dictionary and Features for an organism"
20
+ rule(/data\/(.*)/) do |t|
21
+ org = File.basename(t.name)
22
+
23
+ go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq
24
+ all = Organism.literature(org).flatten.uniq - go
25
+
26
+ ndocs = [go.length, all.length, $ndocs.to_i].min
27
+ puts "Using #{ ndocs } from each class\n\n"
28
+
29
+ go = go.shuffle[0..ndocs - 1]
30
+ all = all.shuffle[0..ndocs - 1]
31
+
32
+ dict = Dictionary::KL.new
33
+
34
+
35
+
36
+ chunks = all.chunk(50)
37
+ Progress.monitor("Building Dictionary for #{ org }: -",1000)
38
+ chunks.each{|chunk|
39
+ PubMed.get_article(chunk).each{|pmid, article|
40
+ words = BagOfWords.terms(article.text,$bigrams)
41
+ dict.add(words, :-)
42
+ }
43
+ }
44
+
45
+ chunks = go.chunk(50)
46
+ Progress.monitor("Building Dictionary for #{ org }: +",1000)
47
+ chunks.each{|chunk|
48
+ PubMed.get_article(chunk).each{|pmid, article|
49
+ words = BagOfWords.terms(article.text,$bigrams)
50
+ dict.add(words, :+)
51
+ }
52
+ }
53
+
54
+ term_weigths = dict.weights(:low => $low.to_f, :hi => $hi.to_f, :limit => $max.to_i)
55
+ Open.write(t.name + '.dict', term_weigths.sort.collect{|p| p.join("\t")}.join("\n"))
56
+
57
+ terms = term_weigths.keys.sort
58
+
59
+ fout = File.open(t.name, 'w')
60
+ fout.puts((['Name','Class'] + terms).join("\t"))
61
+
62
+ Progress.monitor("Building Features for #{ org }", 1000)
63
+ all.each{|pmid|
64
+ text = PubMed.get_article(pmid).text
65
+ fout.puts(([pmid, :-] + BagOfWords.features(text, terms)).join("\t"))
66
+ }
67
+ go.each{|pmid|
68
+ text = PubMed.get_article(pmid).text
69
+ fout.puts(([pmid, :+] + BagOfWords.features(text, terms)).join("\t"))
70
+ }
71
+
72
+
73
+ fout.close
74
+ end
75
+
76
+ rule (/model\/(.*)/) => lambda{|n| n.sub(/model/,'data')} do |t|
77
+ features = t.name.sub(/model/,'data')
78
+ Classifier.create_model(features, t.name, features + '.dict')
79
+ end
80
+
81
+ rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
82
+ model = t.name.sub(/results/,'model')
83
+ features = t.name.sub(/results/,'data')
84
+ org = File.basename(t.name)
85
+
86
+ ndocs = 100
87
+
88
+ used = Open.read(features).collect{|l| l.chomp.split(/\t/).first}[1..-1]
89
+
90
+ classifier = Classifier.new(model)
91
+ go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq - used
92
+ all = Organism.literature(org).flatten.uniq - go - used
93
+
94
+ go = go.shuffle[0..ndocs - 1]
95
+ all = all.shuffle[0..ndocs - 1]
96
+
97
+ ndocs = go.length + all.length
98
+
99
+ raise "Not enogh unused articles to evaluate" if go.empty? || all.empty?
100
+
101
+ features_go = PubMed.get_article(go).collect{|pmid, article|
102
+ article = article.text
103
+ }
104
+ pos = classifier.classify(features_go).select{|v| v == '+'}.length
105
+
106
+ features_all = PubMed.get_article(all).collect{|pmid, article|
107
+ article = article.text
108
+ }
109
+ neg = classifier.classify(features_all).select{|v| v == '-'}.length
110
+
111
+ puts "#{ pos } #{ neg }"
112
+
113
+ precision = (pos + neg) / (ndocs).to_f
114
+ recall = pos / go.length.to_f
115
+ f1 = ( 2 * precision * recall) / (precision + recall ).to_f
116
+
117
+ puts "Precision: #{ precision}, Recall: #{ recall }, F1: #{f1}"
118
+ end
119
+
120
+ task 'clean' do
121
+ FileUtils.rm Dir.glob("data/*")
122
+ FileUtils.rm Dir.glob("model/*")
123
+ FileUtils.rm Dir.glob("results/*")
124
+
125
+ end
126
+ task 'all' do
127
+ Organism.all.each{|org|
128
+ Rake::Task["model/#{ org }"].invoke
129
+ }
130
+ end
131
+ task 'update' do
132
+ if $org
133
+ FileUtils.rm Dir.glob("**/#{$org}.*") if $force
134
+ Rake::Task["model/#{$org}"].invoke
135
+ else
136
+ Rake::Task['clean'].invoke if $force
137
+ Rake::Task['all'].invoke
138
+ end
139
+ end
140
+
@@ -0,0 +1,2 @@
1
+ #!/bin/bash
2
+ wget http://pages.cs.wisc.edu/~bsettles/abner/abner.jar
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+
3
+ wget "http://downloads.sourceforge.net/banner/BANNER_v02.zip?modtime=1196955449&big_mirror=0"
4
+ wget "http://downloads.sourceforge.net/banner/gene_model_v02.bin?modtime=1196955509&big_mirror=0"
5
+ mv BANNER_v02.zip BANNER.zip
6
+ mv gene_model_v02.bin gene_model.bin
7
+ unzip BANNER.zip
8
+ cd BANNER
9
+ libs=`find libs/ -name "*.jar"`
10
+ mkdir classes
11
+ javac -classpath `echo $libs|sed s/\ /:/g` -d classes `find src/ -name "*.java"`
12
+ cd classes
13
+ for f in ../libs/*.jar; do jar xf "$f";done
14
+ jar cf banner.jar *
15
+ mv banner.jar ../..
16
+ cd ..
17
+ cp -R nlpdata/ ../
18
+ cd ..
19
+ rm BANNER.zip
20
+ rm -Rf BANNER
21
+
22
+
23
+
24
+
25
+
@@ -0,0 +1,72 @@
1
+ #!/bin/bash
2
+
3
+ mkdir src
4
+ cd src
5
+ wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2GNandGMgold_Subs.tar.gz"
6
+ wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1a.tar.gz"
7
+ wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1b.tar.gz"
8
+ wget "http://mesh.dl.sourceforge.net/sourceforge/biocreative/biocreative1task2.tar.gz"
9
+ wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2geneMention.tar.gz"
10
+ wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/bc2normal.1.4.tar.gz"
11
+ wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/bc2GNtest.zip"
12
+
13
+ for f in *.gz; do tar xfz $f; done
14
+ unzip bc2GNtest.zip
15
+
16
+ cd ..
17
+
18
+ mkdir BC2GM
19
+ cp -R src/bc2geneMention/train/ BC2GM/
20
+ cp -R src/sourceforgeDistrib-22-Sept-07/genemention/BC2GM/test/ BC2GM/
21
+ mv BC2GM/train/alt_eval.perl BC2GM/
22
+
23
+ mkdir BC2GN
24
+ cp -R src/biocreative2normalization/* BC2GN/
25
+ mv BC2GN/noisyTrainingData/ BC2GN/NoisyTrain
26
+ mv BC2GN/trainingData/ BC2GN/Train
27
+ cp -R src/bc2GNtest/bc2GNtestdocs/ BC2GN/Test
28
+ mv BC2GN/NoisyTrain/noisytrain.genelist BC2GN/NoisyTrain/genelist
29
+ mv BC2GN/Train/training.genelist BC2GN/Train/genelist
30
+ cp src/sourceforgeDistrib-22-Sept-07/genenormalization/bc2test.genelist BC2GN/Test/genelist
31
+
32
+ mkdir BC1GN
33
+ cp -R src/biocreative1/bc1task1b/* BC1GN/
34
+ mv BC1GN/fly/FlyDevTest/ BC1GN/fly/devtest
35
+ mv BC1GN/fly/FlyEvaluation/ BC1GN/fly/test
36
+ mv BC1GN/fly/FlyNoisyTraining/ BC1GN/fly/train
37
+ mv BC1GN/fly/*.list BC1GN/fly/synonyms.list
38
+ mv BC1GN/fly/test/*gene_list BC1GN/fly/test/genelist
39
+ for f in BC1GN/fly/train/gene_list/*; do cat "$f" >> BC1GN/fly/train/genelist;done
40
+ for f in BC1GN/fly/devtest/gene_lists/*; do cat "$f" >> BC1GN/fly/devtest/genelist;done
41
+ mv BC1GN/mouse/MouseDevTest/ BC1GN/mouse/devtest
42
+ mv BC1GN/mouse/MouseEvaluation/ BC1GN/mouse/test
43
+ mv BC1GN/mouse/MouseNoisyTraining/ BC1GN/mouse/train
44
+ mv BC1GN/mouse/*.list BC1GN/mouse/synonyms.list
45
+ mv BC1GN/mouse/test/*gene_list BC1GN/mouse/test/genelist
46
+ for f in BC1GN/mouse/train/gene_list/*; do cat "$f" >> BC1GN/mouse/train/genelist;done
47
+ for f in BC1GN/mouse/devtest/gene_lists/*; do cat "$f" >> BC1GN/mouse/devtest/genelist;done
48
+ mv BC1GN/yeast/YeastDevTest/ BC1GN/yeast/devtest
49
+ mv BC1GN/yeast/YeastEvaluation/ BC1GN/yeast/test
50
+ mv BC1GN/yeast/YeastNoisyTraining/ BC1GN/yeast/train
51
+ mv BC1GN/yeast/*.list BC1GN/yeast/synonyms.list
52
+ mv BC1GN/yeast/test/*gene_list BC1GN/yeast/test/genelist
53
+ for f in BC1GN/yeast/train/gene_list/*; do cat "$f" >> BC1GN/yeast/train/genelist;done
54
+ for f in BC1GN/yeast/devtest/gene_lists/*; do cat "$f" >> BC1GN/yeast/devtest/genelist;done
55
+ # Fix a bug in the perl script! :-|
56
+ cat BC1GN/task1Bscorer.pl |grep -v 'else {EVALFILE = STDIN;}' >foo; mv foo BC1GN/task1Bscorer.pl
57
+
58
+
59
+
60
+ rm -Rf src
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
@@ -0,0 +1,26 @@
1
+ wget "http://downloads.sourceforge.net/crfpp/CRF%2B%2B-0.51.tar.gz?modtime=1215793886&big_mirror=0" -O crf++.tar.gz
2
+ tar xvfz crf++.tar.gz
3
+ rm crf++.tar.gz
4
+ cd CRF*
5
+ PREFIX=$(dirname $PWD)
6
+
7
+ if [ `uname -m` == 'x86_64' ]; then
8
+ WITH_PIC='--with-pic';
9
+ else
10
+ WITH_PIC=''
11
+ fi
12
+
13
+ ./configure --prefix=$PREFIX --exec-prefix=$PREFIX $WITH_PIC;
14
+ make install
15
+ cd ruby
16
+
17
+ ruby extconf.rb --with-opt-lib=$PREFIX/lib/ --with-opt-include=$PREFIX/include/
18
+ make
19
+ cc -shared -o CRFPP.so CRFPP_wrap.o ../../lib/libcrfpp.a -L. -L/usr/lib -L. -rdynamic -Wl,-export-dynamic -lruby -lpthread -lpthread -ldl -lcrypt -lm -lc -lstdc++
20
+
21
+ mkdir ../../ruby/
22
+ cp CRFPP.so ../../ruby/
23
+ cd ../../
24
+ rm -Rf CRF* include
25
+
26
+
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz; gunzip gene_info.gz
4
+ wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz; gunzip gene2pubmed.gz
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ wget ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo
4
+ wget http://www.geneontology.org/GO_slims/goslim_generic.obo
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+
3
+ wget http://wishart.biology.ualberta.ca/polysearch/include/disease_IDlist.txt -O disease.txt
4
+ wget http://wishart.biology.ualberta.ca/polysearch/include/organ_ID.txt -O organ.txt
5
+ wget http://wishart.biology.ualberta.ca/polysearch/include/tissue_ID.txt -O tissue.txt
6
+ wget http://wishart.biology.ualberta.ca/polysearch/include/subcellular_localization_ID.txt -O subcellular.txt
7
+ wget http://wishart.biology.ualberta.ca/polysearch/include/drugnames.txt -O drug.txt
8
+ wget http://wishart.biology.ualberta.ca/polysearch/include/HMDBnames.txt -O metabolite.txt