rbbt 1.1.7 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +72 -136
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -246
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -145
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -79
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Rakefile +0 -43
  22. data/install_scripts/organisms/cgd.Rakefile +0 -84
  23. data/install_scripts/organisms/human.Rakefile +0 -145
  24. data/install_scripts/organisms/mgi.Rakefile +0 -77
  25. data/install_scripts/organisms/pombe.Rakefile +0 -40
  26. data/install_scripts/organisms/rake-include.rb +0 -258
  27. data/install_scripts/organisms/rgd.Rakefile +0 -88
  28. data/install_scripts/organisms/sgd.Rakefile +0 -66
  29. data/install_scripts/organisms/tair.Rakefile +0 -54
  30. data/install_scripts/organisms/worm.Rakefile +0 -109
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -86
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -40
  49. data/lib/rbbt/sources/organism.rb +0 -245
  50. data/lib/rbbt/sources/polysearch.rb +0 -117
  51. data/lib/rbbt/sources/pubmed.rb +0 -111
  52. data/lib/rbbt/util/arrayHash.rb +0 -255
  53. data/lib/rbbt/util/filecache.rb +0 -72
  54. data/lib/rbbt/util/index.rb +0 -47
  55. data/lib/rbbt/util/misc.rb +0 -106
  56. data/lib/rbbt/util/open.rb +0 -235
  57. data/lib/rbbt/util/rake.rb +0 -183
  58. data/lib/rbbt/util/simpleDSL.rb +0 -87
  59. data/lib/rbbt/util/tmpfile.rb +0 -19
  60. data/tasks/install.rake +0 -124
@@ -1,36 +0,0 @@
1
- library('e1071')
2
-
3
- BOW.norm <- function(x, weights = NULL){
4
- x = 1 + log(x);
5
- x[x==-Inf] = 0;
6
- x.sum = as.matrix(x) %*% matrix(1,nrow=dim(x)[2],ncol=1);
7
- x.sum = matrix(100/x.sum,nrow=length(x.sum),ncol=dim(x)[2]);
8
- x.norm = x * x.sum;
9
- rm(x.sum);
10
- x.norm[is.na(x.norm)] = 0
11
-
12
- if (!is.null(weights)){
13
- x.norm = x.norm * matrix(abs(weights),ncol=length(weights),nrow=dim(x.norm)[1],byrow=T)
14
- }
15
-
16
- x.norm;
17
- }
18
-
19
-
20
- BOW.classification.model <- function(features, modelfile, dictfile = NULL){
21
- feats = read.table(features, sep="\t", header=T, row.names=1);
22
-
23
- if (!is.null(dictfile)){
24
- svm.weights = read.table(file=dictfile, sep="\t")[2];
25
- }else {
26
- svm.weights = NULL;
27
- }
28
- feats[-1] = BOW.norm(feats[-1], svm.weights);
29
- svm.model = svm(Class ~ ., data=feats, svm.weights);
30
- save(svm.model,svm.weights, file=modelfile);
31
- }
32
-
33
- BOW.classification.classify <- function(modelfile, x, weights = NULL){
34
- x = BOW.norm(x, weights);
35
- predict(modelfile, x);
36
- }
@@ -1,145 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/sources/organism'
3
- require 'rbbt/sources/pubmed'
4
- require 'rbbt/bow/bow'
5
- require 'rbbt/bow/dictionary'
6
- require 'rbbt/bow/classifier'
7
- require 'rbbt/util/misc'
8
-
9
- require 'progress-monitor'
10
- require 'rand'
11
-
12
- $hi = ENV['hi'] || 0.8
13
- $low = ENV['low'] || 0.01
14
- $max = ENV['max'] || 3000
15
- $bigrams = ENV['bigrams'] == 'true' || false
16
-
17
- $ndocs = ENV['ndocs'] || 5000
18
-
19
- desc "Bilds Dictionary and Features for an organism"
20
- rule(/data\/(.*)/) do |t|
21
- org = File.basename(t.name)
22
-
23
- go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq
24
- all = Organism.literature(org).flatten.uniq - go
25
-
26
- ndocs = [go.length, all.length, $ndocs.to_i].min
27
- puts "Using #{ ndocs } from each class\n\n"
28
-
29
- go = go.shuffle[0..ndocs - 1]
30
- all = all.shuffle[0..ndocs - 1]
31
-
32
- dict = Dictionary::KL.new
33
-
34
-
35
-
36
- chunks = all.chunk(50)
37
- Progress.monitor("Building Dictionary for #{ org }: -",1000)
38
- chunks.each{|chunk|
39
- PubMed.get_article(chunk).each{|pmid, article|
40
- words = BagOfWords.terms(article.text,$bigrams)
41
- dict.add(words, :-)
42
- }
43
- }
44
-
45
- chunks = go.chunk(50)
46
- Progress.monitor("Building Dictionary for #{ org }: +",1000)
47
- chunks.each{|chunk|
48
- PubMed.get_article(chunk).each{|pmid, article|
49
- words = BagOfWords.terms(article.text,$bigrams)
50
- dict.add(words, :+)
51
- }
52
- }
53
-
54
- term_weigths = dict.weights(:low => $low.to_f, :hi => $hi.to_f, :limit => $max.to_i)
55
- Open.write(t.name + '.dict', term_weigths.sort.collect{|p| p.join("\t")}.join("\n"))
56
-
57
- terms = term_weigths.keys.sort
58
-
59
- fout = File.open(t.name, 'w')
60
- fout.puts((['Name','Class'] + terms).join("\t"))
61
-
62
- Progress.monitor("Building Features for #{ org }", 1000)
63
- all.each{|pmid|
64
- text = PubMed.get_article(pmid).text
65
- fout.puts(([pmid, :-] + BagOfWords.features(text, terms)).join("\t"))
66
- }
67
- go.each{|pmid|
68
- text = PubMed.get_article(pmid).text
69
- fout.puts(([pmid, :+] + BagOfWords.features(text, terms)).join("\t"))
70
- }
71
-
72
-
73
- fout.close
74
- end
75
-
76
- rule (/model\/(.*)/) => lambda{|n| n.sub(/model/,'data')} do |t|
77
- features = t.name.sub(/model/,'data')
78
- Classifier.create_model(features, t.name, features + '.dict')
79
- end
80
-
81
- rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
82
- model = t.name.sub(/results/,'model')
83
- features = t.name.sub(/results/,'data')
84
- org = File.basename(t.name)
85
-
86
- ndocs = 100
87
-
88
- used = []
89
- if "".respond_to? :collect
90
- used = Open.read(features).collect{|l| l.chomp.split(/\t/).first}[1..-1]
91
- else
92
- used = Open.read(features).lines.collect{|l| l.chomp.split(/\t/).first}[1..-1]
93
- end
94
-
95
- classifier = Classifier.new(model)
96
- go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq - used
97
- all = Organism.literature(org).flatten.uniq - go - used
98
-
99
- go = go.shuffle[0..ndocs - 1]
100
- all = all.shuffle[0..ndocs - 1]
101
-
102
- ndocs = go.length + all.length
103
-
104
- raise "Not enogh unused articles to evaluate" if go.empty? || all.empty?
105
-
106
- features_go = PubMed.get_article(go).collect{|pmid, article|
107
- article = article.text
108
- }
109
- pos = classifier.classify(features_go).select{|v| v == '+'}.length
110
-
111
- features_all = PubMed.get_article(all).collect{|pmid, article|
112
- article = article.text
113
- }
114
- neg = classifier.classify(features_all).select{|v| v == '-'}.length
115
-
116
- puts "#{ pos } #{ neg }"
117
-
118
- precision = (pos + neg) / (ndocs).to_f
119
- recall = pos / go.length.to_f
120
- f1 = ( 2 * precision * recall) / (precision + recall ).to_f
121
-
122
- puts "Precision: #{ precision}, Recall: #{ recall }, F1: #{f1}"
123
- end
124
-
125
- task 'clean' do
126
- FileUtils.rm Dir.glob("data/*")
127
- FileUtils.rm Dir.glob("model/*")
128
- FileUtils.rm Dir.glob("results/*")
129
-
130
- end
131
- task 'all' do
132
- Organism.all.each{|org|
133
- Rake::Task["model/#{ org }"].invoke
134
- }
135
- end
136
- task 'update' do
137
- if $org
138
- FileUtils.rm Dir.glob("**/#{$org}.*") if $force
139
- Rake::Task["model/#{$org}"].invoke
140
- else
141
- Rake::Task['clean'].invoke if $force
142
- Rake::Task['all'].invoke
143
- end
144
- end
145
-
@@ -1,2 +0,0 @@
1
- #!/bin/bash
2
- wget http://pages.cs.wisc.edu/~bsettles/abner/abner.jar
@@ -1,25 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget "http://downloads.sourceforge.net/banner/BANNER_v02.zip?modtime=1196955449&big_mirror=0"
4
- wget "http://downloads.sourceforge.net/banner/gene_model_v02.bin?modtime=1196955509&big_mirror=0"
5
- mv BANNER_v02.zip BANNER.zip
6
- mv gene_model_v02.bin gene_model.bin
7
- unzip BANNER.zip
8
- cd BANNER
9
- libs=`find libs/ -name "*.jar"`
10
- mkdir classes
11
- javac -classpath `echo $libs|sed s/\ /:/g` -d classes `find src/ -name "*.java"`
12
- cd classes
13
- for f in ../libs/*.jar; do jar xf "$f";done
14
- jar cf banner.jar *
15
- mv banner.jar ../..
16
- cd ..
17
- cp -R nlpdata/ ../
18
- cd ..
19
- rm BANNER.zip
20
- rm -Rf BANNER
21
-
22
-
23
-
24
-
25
-
@@ -1,72 +0,0 @@
1
- #!/bin/bash
2
-
3
- mkdir src
4
- cd src
5
- wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2GNandGMgold_Subs.tar.gz"
6
- wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1a.tar.gz"
7
- wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1b.tar.gz"
8
- wget "http://mesh.dl.sourceforge.net/sourceforge/biocreative/biocreative1task2.tar.gz"
9
- wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2geneMention.tar.gz"
10
- wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/bc2normal.1.4.tar.gz"
11
- wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/bc2GNtest.zip"
12
-
13
- for f in *.gz; do tar xfz $f; done
14
- unzip bc2GNtest.zip
15
-
16
- cd ..
17
-
18
- mkdir BC2GM
19
- cp -R src/bc2geneMention/train/ BC2GM/
20
- cp -R src/sourceforgeDistrib-22-Sept-07/genemention/BC2GM/test/ BC2GM/
21
- mv BC2GM/train/alt_eval.perl BC2GM/
22
-
23
- mkdir BC2GN
24
- cp -R src/biocreative2normalization/* BC2GN/
25
- mv BC2GN/noisyTrainingData/ BC2GN/NoisyTrain
26
- mv BC2GN/trainingData/ BC2GN/Train
27
- cp -R src/bc2GNtest/bc2GNtestdocs/ BC2GN/Test
28
- mv BC2GN/NoisyTrain/noisytrain.genelist BC2GN/NoisyTrain/genelist
29
- mv BC2GN/Train/training.genelist BC2GN/Train/genelist
30
- cp src/sourceforgeDistrib-22-Sept-07/genenormalization/bc2test.genelist BC2GN/Test/genelist
31
-
32
- mkdir BC1GN
33
- cp -R src/biocreative1/bc1task1b/* BC1GN/
34
- mv BC1GN/fly/FlyDevTest/ BC1GN/fly/devtest
35
- mv BC1GN/fly/FlyEvaluation/ BC1GN/fly/test
36
- mv BC1GN/fly/FlyNoisyTraining/ BC1GN/fly/train
37
- mv BC1GN/fly/*.list BC1GN/fly/synonyms.list
38
- mv BC1GN/fly/test/*gene_list BC1GN/fly/test/genelist
39
- for f in BC1GN/fly/train/gene_list/*; do cat "$f" >> BC1GN/fly/train/genelist;done
40
- for f in BC1GN/fly/devtest/gene_lists/*; do cat "$f" >> BC1GN/fly/devtest/genelist;done
41
- mv BC1GN/mouse/MouseDevTest/ BC1GN/mouse/devtest
42
- mv BC1GN/mouse/MouseEvaluation/ BC1GN/mouse/test
43
- mv BC1GN/mouse/MouseNoisyTraining/ BC1GN/mouse/train
44
- mv BC1GN/mouse/*.list BC1GN/mouse/synonyms.list
45
- mv BC1GN/mouse/test/*gene_list BC1GN/mouse/test/genelist
46
- for f in BC1GN/mouse/train/gene_list/*; do cat "$f" >> BC1GN/mouse/train/genelist;done
47
- for f in BC1GN/mouse/devtest/gene_lists/*; do cat "$f" >> BC1GN/mouse/devtest/genelist;done
48
- mv BC1GN/yeast/YeastDevTest/ BC1GN/yeast/devtest
49
- mv BC1GN/yeast/YeastEvaluation/ BC1GN/yeast/test
50
- mv BC1GN/yeast/YeastNoisyTraining/ BC1GN/yeast/train
51
- mv BC1GN/yeast/*.list BC1GN/yeast/synonyms.list
52
- mv BC1GN/yeast/test/*gene_list BC1GN/yeast/test/genelist
53
- for f in BC1GN/yeast/train/gene_list/*; do cat "$f" >> BC1GN/yeast/train/genelist;done
54
- for f in BC1GN/yeast/devtest/gene_lists/*; do cat "$f" >> BC1GN/yeast/devtest/genelist;done
55
- # Fix a bug in the perl script! :-|
56
- cat BC1GN/task1Bscorer.pl |grep -v 'else {EVALFILE = STDIN;}' >foo; mv foo BC1GN/task1Bscorer.pl
57
-
58
-
59
-
60
- rm -Rf src
61
-
62
-
63
-
64
-
65
-
66
-
67
-
68
-
69
-
70
-
71
-
72
-
@@ -1,26 +0,0 @@
1
- wget "http://downloads.sourceforge.net/crfpp/CRF%2B%2B-0.51.tar.gz?modtime=1215793886&big_mirror=0" -O crf++.tar.gz
2
- tar xvfz crf++.tar.gz
3
- rm crf++.tar.gz
4
- cd CRF*
5
- PREFIX=$(dirname $PWD)
6
-
7
- if [ `uname -m` == 'x86_64' ]; then
8
- WITH_PIC='--with-pic';
9
- else
10
- WITH_PIC=''
11
- fi
12
-
13
- ./configure --prefix=$PREFIX --exec-prefix=$PREFIX $WITH_PIC;
14
- make install
15
- cd ruby
16
-
17
- ruby extconf.rb --with-opt-lib=$PREFIX/lib/ --with-opt-include=$PREFIX/include/
18
- make
19
- cc -shared -o CRFPP.so CRFPP_wrap.o ../../lib/libcrfpp.a -L. -L/usr/lib -L. -rdynamic -Wl,-export-dynamic -lruby -lpthread -lpthread -ldl -lcrypt -lm -lc -lstdc++
20
-
21
- mkdir ../../ruby/
22
- cp CRFPP.so ../../ruby/
23
- cd ../../
24
- rm -Rf CRF* include
25
-
26
-
@@ -1,4 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz; gunzip gene_info.gz
4
- wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz; gunzip gene2pubmed.gz
@@ -1,4 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo
4
- wget http://www.geneontology.org/GO_slims/goslim_generic.obo
@@ -1,8 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget http://wishart.biology.ualberta.ca/polysearch/include/disease_IDlist.txt -O disease.txt
4
- wget http://wishart.biology.ualberta.ca/polysearch/include/organ_ID.txt -O organ.txt
5
- wget http://wishart.biology.ualberta.ca/polysearch/include/tissue_ID.txt -O tissue.txt
6
- wget http://wishart.biology.ualberta.ca/polysearch/include/subcellular_localization_ID.txt -O subcellular.txt
7
- wget http://wishart.biology.ualberta.ca/polysearch/include/drugnames.txt -O drug.txt
8
- wget http://wishart.biology.ualberta.ca/polysearch/include/HMDBnames.txt -O metabolite.txt
@@ -1,206 +0,0 @@
1
- require 'rbbt/sources/organism'
2
- require 'rbbt/sources/biocreative'
3
- require 'rbbt/ner/rner'
4
-
5
- require 'progress-monitor'
6
-
7
-
8
- $type = ENV['type'] || 'rner'
9
-
10
- #{{{ FEATURES
11
-
12
- def BC2GM_features(dataset, outfile)
13
- data = Biocreative.BC2GM(dataset)
14
-
15
- fout = File.open(outfile,'w')
16
- parser = NERFeatures.new
17
-
18
- Progress.monitor("CRFPP Features BC2GM #{ dataset }")
19
- data.each{|code, info|
20
- text = info[:text]
21
- mentions = info[:mentions]
22
-
23
- features = parser.tagged_features(text,mentions)
24
-
25
- features.each{|feat|
26
- fout.puts feat.join(" ")
27
- }
28
- fout.puts
29
- }
30
- fout.close
31
- end
32
-
33
- def BC2GN_features(dataset, outfile)
34
- data = {}
35
- Dir.glob(File.join(Rbbt.datadir,'biocreative','BC2GN',dataset,'*.txt')).each{|f|
36
- code = File.basename(f).sub(/.txt/,'')
37
- data[code] = {}
38
- data[code][:text] = Open.read(f)
39
- }
40
- Open.read(File.join(Rbbt.datadir,'biocreative','BC2GN',dataset,'genelist')).each_line{|l|
41
- code, gene, mention = l.chomp.split(/\t/)
42
- data[code][:mentions] ||= []
43
- data[code][:mentions] << mention
44
- }
45
-
46
- fout = File.open(outfile,'w')
47
- parser = NERFeatures.new
48
-
49
- Progress.monitor("CRFPP Features BC2GN #{ dataset }")
50
- data.each{|code, info|
51
- text = info[:text]
52
- mentions = info[:mentions]
53
- next if mentions.nil?
54
-
55
- features = parser.tagged_features(text,mentions)
56
-
57
- features.each{|feat|
58
- fout.puts feat.join(" ")
59
- }
60
- fout.puts
61
- }
62
- fout.close
63
- end
64
-
65
- def org_features(org, outfile)
66
- names = Organism.lexicon(org).collect{|code, names|
67
- names
68
- }.flatten
69
-
70
- fout = File.open(outfile,'w')
71
- parser = NERFeatures.new
72
-
73
- Progress.monitor("CRFPP Features #{ org }")
74
- names.each{|name|
75
- features = parser.text_features(name, true)
76
- features.each{|feat|
77
- fout.puts feat.join(" ")
78
- }
79
- fout.puts
80
- }
81
- fout.close
82
-
83
-
84
- end
85
-
86
- file "data/BC2GM_train.features" do |t|
87
- BC2GM_features(:train, 'data/BC2GM_train.features')
88
- end
89
-
90
- file "data/BC2GM_test.features" do |t|
91
- BC2GM_features(:test, 'data/BC2GM_test.features')
92
- end
93
- file "data/BC2GN_Train.features" do |t|
94
- BC2GN_features('Train', 'data/BC2GN_Train.features')
95
- end
96
-
97
- file "data/BC2GN_Test.features" do |t|
98
- BC2GN_features('Test', 'data/BC2GN_Test.features')
99
- end
100
-
101
-
102
- file "data/BC2GM.features" => ['data/BC2GM_train.features','data/BC2GM_test.features'] do |t|
103
- Open.write('data/BC2GM.features',Open.read('data/BC2GM_train.features'))
104
- Open.append('data/BC2GM.features',Open.read('data/BC2GM_test.features'))
105
- end
106
-
107
- file "data/BC2GN.features" => ['data/BC2GN_Train.features','data/BC2GN_Test.features'] do |t|
108
- Open.write('data/BC2GN.features',Open.read('data/BC2GN_Train.features'))
109
- Open.append('data/BC2GN.features',Open.read('data/BC2GN_Test.features'))
110
- end
111
-
112
-
113
- file "data/BC2.features" => ['data/BC2GN.features','data/BC2GM.features'] do |t|
114
- Open.write('data/BC2.features',Open.read('data/BC2GM.features'))
115
- Open.append('data/BC2.features',Open.read('data/BC2GN.features'))
116
- end
117
-
118
- file "data/train.features" => [
119
- #'data/BC2GN.features',
120
- 'data/BC2GM_train.features'
121
- ] do |t|
122
- t.prerequisites.each_with_index{|f,i|
123
- if i == 0
124
- Open.write('data/train.features',Open.read(f))
125
- else
126
- Open.append('data/train.features',Open.read(f))
127
- end
128
- }
129
- end
130
-
131
- rule (/data\/(.*).features/) => ['data/BC2.features'] do |t|
132
- org = File.basename(t.name).sub(/.features$/,'')
133
- org_features(org, t.name)
134
- Open.append(t.name, Open.read('data/BC2.features'))
135
- end
136
-
137
-
138
-
139
- #{{{ MODEL
140
- rule (/model\/(.*)/) => lambda {|t| t.sub(/model/,'data') + '.features'} do |t|
141
- parser = NERFeatures.new
142
- parser.train( t.name.sub(/model/,'data') + '.features', t.name)
143
- end
144
-
145
- task 'clean' do
146
- FileUtils.rm Dir.glob("data/*")
147
- FileUtils.rm Dir.glob("model/*")
148
- FileUtils.rm Dir.glob("results/*")
149
-
150
- end
151
-
152
- task 'all' do
153
- Organism.all.each{|org|
154
- Rake::Task["model/#{ org }"].invoke
155
- }
156
- end
157
-
158
- task 'default' do
159
- if $org
160
- FileUtils.rm Dir.glob("**/#{$org}.*") if $force
161
- Rake::Task["model/#{$org}"].invoke
162
- else
163
- Rake::Task['clean'].invoke if $force
164
- Rake::Task['all'].invoke
165
- end
166
- end
167
-
168
- #{{{ EVALUATE
169
-
170
-
171
- def find(model, type, outfile)
172
- ner = Organism.ner(:human,type,:model => model)
173
-
174
- data = Biocreative.BC2GM(:test)
175
-
176
- fout = File.open(outfile,'w')
177
-
178
- Progress.monitor("Test")
179
- data.each{|code,info|
180
- text = info[:text]
181
- mentions = ner.extract(text)
182
-
183
- mentions.each{|mention|
184
- positions = Biocreative.position(text,mention)
185
- positions.each{|pos|
186
- fout.puts "#{code}|#{pos[0]} #{pos[1]}|#{mention}"
187
- }
188
- }
189
- }
190
-
191
- end
192
-
193
-
194
-
195
- rule (/results\/test$/) do |t|
196
- org = File.basename(t.name)
197
-
198
- if $type == 'rner'
199
- Rake::Task['model/train'].invoke
200
- end
201
- find('model/train',$type,t.name)
202
- end
203
-
204
- rule (/results\/test.eval$/) => ['results/test'] do |t|
205
- Biocreative.BC2GM_eval('results/test',:test, 'results/test.eval')
206
- end