rbbt 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/LICENSE +20 -0
  2. data/README.rdoc +17 -0
  3. data/bin/rbbt_config +180 -0
  4. data/install_scripts/classifier/R/classify.R +36 -0
  5. data/install_scripts/classifier/Rakefile +140 -0
  6. data/install_scripts/get_abner.sh +2 -0
  7. data/install_scripts/get_banner.sh +25 -0
  8. data/install_scripts/get_biocreative.sh +72 -0
  9. data/install_scripts/get_crf++.sh +26 -0
  10. data/install_scripts/get_entrez.sh +4 -0
  11. data/install_scripts/get_go.sh +4 -0
  12. data/install_scripts/get_polysearch.sh +8 -0
  13. data/install_scripts/ner/Rakefile +206 -0
  14. data/install_scripts/ner/config/default.rb +52 -0
  15. data/install_scripts/norm/Rakefile +218 -0
  16. data/install_scripts/norm/config/cue_default.rb +10 -0
  17. data/install_scripts/norm/config/tokens_default.rb +79 -0
  18. data/install_scripts/norm/functions.sh +21 -0
  19. data/install_scripts/organisms/Rakefile +25 -0
  20. data/install_scripts/organisms/cgd.Rakefile +84 -0
  21. data/install_scripts/organisms/human.Rakefile +145 -0
  22. data/install_scripts/organisms/mgi.Rakefile +77 -0
  23. data/install_scripts/organisms/pombe.Rakefile +40 -0
  24. data/install_scripts/organisms/rake-include.rb +258 -0
  25. data/install_scripts/organisms/rgd.Rakefile +88 -0
  26. data/install_scripts/organisms/sgd.Rakefile +66 -0
  27. data/install_scripts/organisms/tair.Rakefile +54 -0
  28. data/install_scripts/organisms/worm.Rakefile +109 -0
  29. data/install_scripts/stopwords +1 -0
  30. data/install_scripts/wordlists/consonants +897 -0
  31. data/install_scripts/wordlists/stopwords +1 -0
  32. data/lib/rbbt/bow/bow.rb +87 -0
  33. data/lib/rbbt/bow/classifier.rb +118 -0
  34. data/lib/rbbt/bow/dictionary.rb +218 -0
  35. data/lib/rbbt/ner/abner.rb +34 -0
  36. data/lib/rbbt/ner/banner.rb +73 -0
  37. data/lib/rbbt/ner/regexpNER.rb +62 -0
  38. data/lib/rbbt/ner/rner.rb +227 -0
  39. data/lib/rbbt/ner/rnorm/cue_index.rb +80 -0
  40. data/lib/rbbt/ner/rnorm/tokens.rb +213 -0
  41. data/lib/rbbt/ner/rnorm.rb +142 -0
  42. data/lib/rbbt/sources/biocreative.rb +75 -0
  43. data/lib/rbbt/sources/biomart.rb +106 -0
  44. data/lib/rbbt/sources/entrez.rb +211 -0
  45. data/lib/rbbt/sources/go.rb +40 -0
  46. data/lib/rbbt/sources/organism.rb +197 -0
  47. data/lib/rbbt/sources/polysearch.rb +88 -0
  48. data/lib/rbbt/sources/pubmed.rb +111 -0
  49. data/lib/rbbt/util/arrayHash.rb +255 -0
  50. data/lib/rbbt/util/filecache.rb +72 -0
  51. data/lib/rbbt/util/index.rb +69 -0
  52. data/lib/rbbt/util/misc.rb +101 -0
  53. data/lib/rbbt/util/open.rb +207 -0
  54. data/lib/rbbt/util/simpleDSL.rb +87 -0
  55. data/lib/rbbt/util/tmpfile.rb +19 -0
  56. data/lib/rbbt/version.rb +10 -0
  57. data/lib/rbbt.rb +86 -0
  58. data/tasks/install.rake +123 -0
  59. metadata +114 -0
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Miguel Vazquez
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,17 @@
1
+ = rbbt
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2009 Miguel Vazquez. See LICENSE for details.
data/bin/rbbt_config ADDED
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/ruby
2
+
3
+ require 'rubygems'
4
+ require 'rake'
5
+
6
+
7
+ require 'simpleconsole'
8
+
9
+ begin
10
+ require 'rbbt'
11
+ rescue Rbbt::NoConfig
12
+ $noconfig = true
13
+ end
14
+
15
+
16
+ $USAGE =<<EOT
17
+ #{__FILE__} <action> [<subaction>] [--force] [--organism <org>]
18
+ actions:
19
+ * configure: Set paths for data, cache, and tmp directories
20
+
21
+ * install:
22
+ * basic: Third party software
23
+ * databases: Entrez and Biocreative
24
+ * models: Gene Mention and Classification
25
+ * organisms: Rules to gather data for organisms
26
+ * all: 3party wordlists entrez biocreative go ner norm classifier organisms polysearch
27
+
28
+ * update:
29
+ * organisms: Gather data for organisms
30
+ * ner: Build Named Entity Recognition Models for Gene Mention
31
+ * classification:
32
+ Build Function/Process Classifiers
33
+
34
+ * purge_cache: Clean the non-persistent cache, which holds general things
35
+ downloaded using Open.read, like organism identifiers downloaded from
36
+ BioMart. The persistent cache, which hold pubmed articles or entrez gene
37
+ descriptions, is not cleaned, as these are not likely to change
38
+
39
+
40
+ EOT
41
+
42
+ class Controller < SimpleConsole::Controller
43
+
44
+ params :bool => {:f => :force},
45
+ :string => {:o => :organism}
46
+
47
+ def default
48
+ render :action => :usage
49
+ end
50
+
51
+ def help
52
+ render :action => :usage
53
+ end
54
+
55
+ def update
56
+ raise "Run #{__FILE__} configure first to configure rbbt" if $noconfig
57
+
58
+ case params[:id]
59
+ when "organisms"
60
+ @location = File.join(Rbbt.datadir,'organisms')
61
+ when "ner"
62
+ @location = File.join(Rbbt.datadir,'ner')
63
+ when "classifier"
64
+ @location = File.join(Rbbt.datadir,'classifier')
65
+ else
66
+ redirect_to :action => :help, :id => :update
67
+ end
68
+
69
+ $force = true if params[:force]
70
+ $org = params[:organism] if params[:organism]
71
+
72
+ end
73
+
74
+ def install
75
+ raise "Run #{__FILE__} configure first to configure rbbt" if $noconfig
76
+
77
+ case params[:id]
78
+ when "basic"
79
+ @tasks = %w(3party wordlists polysearch)
80
+ when "databases"
81
+ @tasks = %w(entrez biocreative go)
82
+ when "models"
83
+ @tasks = %w(ner norm classifier)
84
+ when "organisms"
85
+ @tasks = %w(organisms)
86
+ when "all"
87
+ @tasks = %w(3party wordlists entrez biocreative go ner norm classifier organisms polysearch)
88
+ when nil
89
+ redirect_to :action => :help, :id => :install
90
+ else
91
+ @tasks = [params[:id]]
92
+ end
93
+
94
+ $force = true if params[:force]
95
+ $org = params[:organism] if params[:organism]
96
+
97
+ end
98
+
99
+ def configure
100
+ end
101
+
102
+ def purge_cache
103
+ end
104
+
105
+ end
106
+
107
+ class View < SimpleConsole::View
108
+ def usage
109
+ puts $USAGE
110
+ end
111
+
112
+ def install
113
+ load File.join(Rbbt.rootdir, 'tasks/install.rake')
114
+
115
+ @tasks.each{|t|
116
+ puts "Invoking #{ t }"
117
+ Rake::Task[t].invoke
118
+ }
119
+ end
120
+
121
+ def update
122
+
123
+ puts "Changing directory to #{@location}"
124
+ chdir @location
125
+
126
+ load "./Rakefile"
127
+
128
+ Rake::Task['default'].invoke
129
+ end
130
+
131
+
132
+ def configure
133
+
134
+ defaultdir = File.join(ENV['HOME'],'rbbt')
135
+
136
+ cachedir = File.join(defaultdir, 'cache')
137
+ tmpdir = File.join(defaultdir, 'tmp')
138
+ datadir = File.join(defaultdir, 'data')
139
+
140
+ puts "Please indicate where you wish to place the data directories"
141
+ puts
142
+
143
+ puts
144
+ puts "* Cache Directory: This directory will hold downloads, from PubMed,
145
+ Entrez and other, for local store. It might grow considerably."
146
+ print "[#{ cachedir }]? "
147
+ input = STDIN.gets
148
+ cachedir = input if input =~ /\w/
149
+
150
+ puts
151
+ puts "* Tmp Directory: Temporary files."
152
+ print "[#{ tmpdir }]? "
153
+ input = STDIN.gets
154
+ tmpdir = input if input =~ /\w/
155
+
156
+ puts
157
+ puts "* Data Directory: Holds data from organisms, databases, third party software, etc."
158
+ print "[#{ datadir }]? "
159
+ input = STDIN.gets
160
+ datadir = input if input =~ /\w/
161
+
162
+
163
+
164
+ fout = File.open(File.join(ENV['HOME'], '.rbbt'),'w')
165
+ fout.puts "cachedir: #{cachedir}"
166
+ fout.puts "tmpdir: #{tmpdir}"
167
+ fout.puts "datadir: #{datadir}"
168
+ fout.close
169
+
170
+ end
171
+
172
+ def purge_cache
173
+ FileUtils.rm Dir.glob(File.join(Rbbt.cachedir,'open-remote','*'))
174
+ end
175
+
176
+ end
177
+
178
+ SimpleConsole::Application.run(ARGV, Controller, View)
179
+
180
+
@@ -0,0 +1,36 @@
1
+ library('e1071')
2
+
3
+ BOW.norm <- function(x, weights = NULL){
4
+ x = 1 + log(x);
5
+ x[x==-Inf] = 0;
6
+ x.sum = as.matrix(x) %*% matrix(1,nrow=dim(x)[2],ncol=1);
7
+ x.sum = matrix(100/x.sum,nrow=length(x.sum),ncol=dim(x)[2]);
8
+ x.norm = x * x.sum;
9
+ rm(x.sum);
10
+ x.norm[is.na(x.norm)] = 0
11
+
12
+ if (!is.null(weights)){
13
+ x.norm = x.norm * matrix(abs(weights),ncol=length(weights),nrow=dim(x.norm)[1],byrow=T)
14
+ }
15
+
16
+ x.norm;
17
+ }
18
+
19
+
20
+ BOW.classification.model <- function(features, modelfile, dictfile = NULL){
21
+ feats = read.table(features, sep="\t", header=T, row.names=1);
22
+
23
+ if (!is.null(dictfile)){
24
+ svm.weights = read.table(file=dictfile, sep="\t")[2];
25
+ }else {
26
+ svm.weights = NULL;
27
+ }
28
+ feats[-1] = BOW.norm(feats[-1], svm.weights);
29
+ svm.model = svm(Class ~ ., data=feats, svm.weights);
30
+ save(svm.model,svm.weights, file=modelfile);
31
+ }
32
+
33
+ BOW.classification.classify <- function(modelfile, x, weights = NULL){
34
+ x = BOW.norm(x, weights);
35
+ predict(modelfile, x);
36
+ }
@@ -0,0 +1,140 @@
1
+ require 'rbbt'
2
+ require 'rbbt/sources/organism'
3
+ require 'rbbt/sources/pubmed'
4
+ require 'rbbt/bow/bow'
5
+ require 'rbbt/bow/dictionary'
6
+ require 'rbbt/bow/classifier'
7
+ require 'rbbt/util/misc'
8
+
9
+ require 'progress-monitor'
10
+ require 'rand'
11
+
12
+ $hi = ENV['hi'] || 0.8
13
+ $low = ENV['low'] || 0.01
14
+ $max = ENV['max'] || 3000
15
+ $bigrams = ENV['bigrams'] == 'true' || false
16
+
17
+ $ndocs = ENV['ndocs'] || 5000
18
+
19
+ desc "Bilds Dictionary and Features for an organism"
20
+ rule(/data\/(.*)/) do |t|
21
+ org = File.basename(t.name)
22
+
23
+ go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq
24
+ all = Organism.literature(org).flatten.uniq - go
25
+
26
+ ndocs = [go.length, all.length, $ndocs.to_i].min
27
+ puts "Using #{ ndocs } from each class\n\n"
28
+
29
+ go = go.shuffle[0..ndocs - 1]
30
+ all = all.shuffle[0..ndocs - 1]
31
+
32
+ dict = Dictionary::KL.new
33
+
34
+
35
+
36
+ chunks = all.chunk(50)
37
+ Progress.monitor("Building Dictionary for #{ org }: -",1000)
38
+ chunks.each{|chunk|
39
+ PubMed.get_article(chunk).each{|pmid, article|
40
+ words = BagOfWords.terms(article.text,$bigrams)
41
+ dict.add(words, :-)
42
+ }
43
+ }
44
+
45
+ chunks = go.chunk(50)
46
+ Progress.monitor("Building Dictionary for #{ org }: +",1000)
47
+ chunks.each{|chunk|
48
+ PubMed.get_article(chunk).each{|pmid, article|
49
+ words = BagOfWords.terms(article.text,$bigrams)
50
+ dict.add(words, :+)
51
+ }
52
+ }
53
+
54
+ term_weigths = dict.weights(:low => $low.to_f, :hi => $hi.to_f, :limit => $max.to_i)
55
+ Open.write(t.name + '.dict', term_weigths.sort.collect{|p| p.join("\t")}.join("\n"))
56
+
57
+ terms = term_weigths.keys.sort
58
+
59
+ fout = File.open(t.name, 'w')
60
+ fout.puts((['Name','Class'] + terms).join("\t"))
61
+
62
+ Progress.monitor("Building Features for #{ org }", 1000)
63
+ all.each{|pmid|
64
+ text = PubMed.get_article(pmid).text
65
+ fout.puts(([pmid, :-] + BagOfWords.features(text, terms)).join("\t"))
66
+ }
67
+ go.each{|pmid|
68
+ text = PubMed.get_article(pmid).text
69
+ fout.puts(([pmid, :+] + BagOfWords.features(text, terms)).join("\t"))
70
+ }
71
+
72
+
73
+ fout.close
74
+ end
75
+
76
+ rule (/model\/(.*)/) => lambda{|n| n.sub(/model/,'data')} do |t|
77
+ features = t.name.sub(/model/,'data')
78
+ Classifier.create_model(features, t.name, features + '.dict')
79
+ end
80
+
81
+ rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
82
+ model = t.name.sub(/results/,'model')
83
+ features = t.name.sub(/results/,'data')
84
+ org = File.basename(t.name)
85
+
86
+ ndocs = 100
87
+
88
+ used = Open.read(features).collect{|l| l.chomp.split(/\t/).first}[1..-1]
89
+
90
+ classifier = Classifier.new(model)
91
+ go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq - used
92
+ all = Organism.literature(org).flatten.uniq - go - used
93
+
94
+ go = go.shuffle[0..ndocs - 1]
95
+ all = all.shuffle[0..ndocs - 1]
96
+
97
+ ndocs = go.length + all.length
98
+
99
+ raise "Not enogh unused articles to evaluate" if go.empty? || all.empty?
100
+
101
+ features_go = PubMed.get_article(go).collect{|pmid, article|
102
+ article = article.text
103
+ }
104
+ pos = classifier.classify(features_go).select{|v| v == '+'}.length
105
+
106
+ features_all = PubMed.get_article(all).collect{|pmid, article|
107
+ article = article.text
108
+ }
109
+ neg = classifier.classify(features_all).select{|v| v == '-'}.length
110
+
111
+ puts "#{ pos } #{ neg }"
112
+
113
+ precision = (pos + neg) / (ndocs).to_f
114
+ recall = pos / go.length.to_f
115
+ f1 = ( 2 * precision * recall) / (precision + recall ).to_f
116
+
117
+ puts "Precision: #{ precision}, Recall: #{ recall }, F1: #{f1}"
118
+ end
119
+
120
+ task 'clean' do
121
+ FileUtils.rm Dir.glob("data/*")
122
+ FileUtils.rm Dir.glob("model/*")
123
+ FileUtils.rm Dir.glob("results/*")
124
+
125
+ end
126
+ task 'all' do
127
+ Organism.all.each{|org|
128
+ Rake::Task["model/#{ org }"].invoke
129
+ }
130
+ end
131
+ task 'update' do
132
+ if $org
133
+ FileUtils.rm Dir.glob("**/#{$org}.*") if $force
134
+ Rake::Task["model/#{$org}"].invoke
135
+ else
136
+ Rake::Task['clean'].invoke if $force
137
+ Rake::Task['all'].invoke
138
+ end
139
+ end
140
+
@@ -0,0 +1,2 @@
1
+ #!/bin/bash
2
+ wget http://pages.cs.wisc.edu/~bsettles/abner/abner.jar
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+
3
+ wget "http://downloads.sourceforge.net/banner/BANNER_v02.zip?modtime=1196955449&big_mirror=0"
4
+ wget "http://downloads.sourceforge.net/banner/gene_model_v02.bin?modtime=1196955509&big_mirror=0"
5
+ mv BANNER_v02.zip BANNER.zip
6
+ mv gene_model_v02.bin gene_model.bin
7
+ unzip BANNER.zip
8
+ cd BANNER
9
+ libs=`find libs/ -name "*.jar"`
10
+ mkdir classes
11
+ javac -classpath `echo $libs|sed s/\ /:/g` -d classes `find src/ -name "*.java"`
12
+ cd classes
13
+ for f in ../libs/*.jar; do jar xf "$f";done
14
+ jar cf banner.jar *
15
+ mv banner.jar ../..
16
+ cd ..
17
+ cp -R nlpdata/ ../
18
+ cd ..
19
+ rm BANNER.zip
20
+ rm -Rf BANNER
21
+
22
+
23
+
24
+
25
+
@@ -0,0 +1,72 @@
1
+ #!/bin/bash
2
+
3
+ mkdir src
4
+ cd src
5
+ wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2GNandGMgold_Subs.tar.gz"
6
+ wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1a.tar.gz"
7
+ wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1b.tar.gz"
8
+ wget "http://mesh.dl.sourceforge.net/sourceforge/biocreative/biocreative1task2.tar.gz"
9
+ wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2geneMention.tar.gz"
10
+ wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/bc2normal.1.4.tar.gz"
11
+ wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/bc2GNtest.zip"
12
+
13
+ for f in *.gz; do tar xfz $f; done
14
+ unzip bc2GNtest.zip
15
+
16
+ cd ..
17
+
18
+ mkdir BC2GM
19
+ cp -R src/bc2geneMention/train/ BC2GM/
20
+ cp -R src/sourceforgeDistrib-22-Sept-07/genemention/BC2GM/test/ BC2GM/
21
+ mv BC2GM/train/alt_eval.perl BC2GM/
22
+
23
+ mkdir BC2GN
24
+ cp -R src/biocreative2normalization/* BC2GN/
25
+ mv BC2GN/noisyTrainingData/ BC2GN/NoisyTrain
26
+ mv BC2GN/trainingData/ BC2GN/Train
27
+ cp -R src/bc2GNtest/bc2GNtestdocs/ BC2GN/Test
28
+ mv BC2GN/NoisyTrain/noisytrain.genelist BC2GN/NoisyTrain/genelist
29
+ mv BC2GN/Train/training.genelist BC2GN/Train/genelist
30
+ cp src/sourceforgeDistrib-22-Sept-07/genenormalization/bc2test.genelist BC2GN/Test/genelist
31
+
32
+ mkdir BC1GN
33
+ cp -R src/biocreative1/bc1task1b/* BC1GN/
34
+ mv BC1GN/fly/FlyDevTest/ BC1GN/fly/devtest
35
+ mv BC1GN/fly/FlyEvaluation/ BC1GN/fly/test
36
+ mv BC1GN/fly/FlyNoisyTraining/ BC1GN/fly/train
37
+ mv BC1GN/fly/*.list BC1GN/fly/synonyms.list
38
+ mv BC1GN/fly/test/*gene_list BC1GN/fly/test/genelist
39
+ for f in BC1GN/fly/train/gene_list/*; do cat "$f" >> BC1GN/fly/train/genelist;done
40
+ for f in BC1GN/fly/devtest/gene_lists/*; do cat "$f" >> BC1GN/fly/devtest/genelist;done
41
+ mv BC1GN/mouse/MouseDevTest/ BC1GN/mouse/devtest
42
+ mv BC1GN/mouse/MouseEvaluation/ BC1GN/mouse/test
43
+ mv BC1GN/mouse/MouseNoisyTraining/ BC1GN/mouse/train
44
+ mv BC1GN/mouse/*.list BC1GN/mouse/synonyms.list
45
+ mv BC1GN/mouse/test/*gene_list BC1GN/mouse/test/genelist
46
+ for f in BC1GN/mouse/train/gene_list/*; do cat "$f" >> BC1GN/mouse/train/genelist;done
47
+ for f in BC1GN/mouse/devtest/gene_lists/*; do cat "$f" >> BC1GN/mouse/devtest/genelist;done
48
+ mv BC1GN/yeast/YeastDevTest/ BC1GN/yeast/devtest
49
+ mv BC1GN/yeast/YeastEvaluation/ BC1GN/yeast/test
50
+ mv BC1GN/yeast/YeastNoisyTraining/ BC1GN/yeast/train
51
+ mv BC1GN/yeast/*.list BC1GN/yeast/synonyms.list
52
+ mv BC1GN/yeast/test/*gene_list BC1GN/yeast/test/genelist
53
+ for f in BC1GN/yeast/train/gene_list/*; do cat "$f" >> BC1GN/yeast/train/genelist;done
54
+ for f in BC1GN/yeast/devtest/gene_lists/*; do cat "$f" >> BC1GN/yeast/devtest/genelist;done
55
+ # Fix a bug in the perl script! :-|
56
+ cat BC1GN/task1Bscorer.pl |grep -v 'else {EVALFILE = STDIN;}' >foo; mv foo BC1GN/task1Bscorer.pl
57
+
58
+
59
+
60
+ rm -Rf src
61
+
62
+
63
+
64
+
65
+
66
+
67
+
68
+
69
+
70
+
71
+
72
+
@@ -0,0 +1,26 @@
1
+ wget "http://downloads.sourceforge.net/crfpp/CRF%2B%2B-0.51.tar.gz?modtime=1215793886&big_mirror=0" -O crf++.tar.gz
2
+ tar xvfz crf++.tar.gz
3
+ rm crf++.tar.gz
4
+ cd CRF*
5
+ PREFIX=$(dirname $PWD)
6
+
7
+ if [ `uname -m` == 'x86_64' ]; then
8
+ WITH_PIC='--with-pic';
9
+ else
10
+ WITH_PIC=''
11
+ fi
12
+
13
+ ./configure --prefix=$PREFIX --exec-prefix=$PREFIX $WITH_PIC;
14
+ make install
15
+ cd ruby
16
+
17
+ ruby extconf.rb --with-opt-lib=$PREFIX/lib/ --with-opt-include=$PREFIX/include/
18
+ make
19
+ cc -shared -o CRFPP.so CRFPP_wrap.o ../../lib/libcrfpp.a -L. -L/usr/lib -L. -rdynamic -Wl,-export-dynamic -lruby -lpthread -lpthread -ldl -lcrypt -lm -lc -lstdc++
20
+
21
+ mkdir ../../ruby/
22
+ cp CRFPP.so ../../ruby/
23
+ cd ../../
24
+ rm -Rf CRF* include
25
+
26
+
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz; gunzip gene_info.gz
4
+ wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz; gunzip gene2pubmed.gz
@@ -0,0 +1,4 @@
1
+ #!/bin/bash
2
+
3
+ wget ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo
4
+ wget http://www.geneontology.org/GO_slims/goslim_generic.obo
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+
3
+ wget http://wishart.biology.ualberta.ca/polysearch/include/disease_IDlist.txt -O disease.txt
4
+ wget http://wishart.biology.ualberta.ca/polysearch/include/organ_ID.txt -O organ.txt
5
+ wget http://wishart.biology.ualberta.ca/polysearch/include/tissue_ID.txt -O tissue.txt
6
+ wget http://wishart.biology.ualberta.ca/polysearch/include/subcellular_localization_ID.txt -O subcellular.txt
7
+ wget http://wishart.biology.ualberta.ca/polysearch/include/drugnames.txt -O drug.txt
8
+ wget http://wishart.biology.ualberta.ca/polysearch/include/HMDBnames.txt -O metabolite.txt