rbbt 1.1.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +72 -136
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -246
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -145
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -79
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Rakefile +0 -43
  22. data/install_scripts/organisms/cgd.Rakefile +0 -84
  23. data/install_scripts/organisms/human.Rakefile +0 -145
  24. data/install_scripts/organisms/mgi.Rakefile +0 -77
  25. data/install_scripts/organisms/pombe.Rakefile +0 -40
  26. data/install_scripts/organisms/rake-include.rb +0 -258
  27. data/install_scripts/organisms/rgd.Rakefile +0 -88
  28. data/install_scripts/organisms/sgd.Rakefile +0 -66
  29. data/install_scripts/organisms/tair.Rakefile +0 -54
  30. data/install_scripts/organisms/worm.Rakefile +0 -109
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -86
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -213
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -40
  49. data/lib/rbbt/sources/organism.rb +0 -245
  50. data/lib/rbbt/sources/polysearch.rb +0 -117
  51. data/lib/rbbt/sources/pubmed.rb +0 -111
  52. data/lib/rbbt/util/arrayHash.rb +0 -255
  53. data/lib/rbbt/util/filecache.rb +0 -72
  54. data/lib/rbbt/util/index.rb +0 -47
  55. data/lib/rbbt/util/misc.rb +0 -106
  56. data/lib/rbbt/util/open.rb +0 -235
  57. data/lib/rbbt/util/rake.rb +0 -183
  58. data/lib/rbbt/util/simpleDSL.rb +0 -87
  59. data/lib/rbbt/util/tmpfile.rb +0 -19
  60. data/tasks/install.rake +0 -124
@@ -1,36 +0,0 @@
1
- library('e1071')
2
-
3
- BOW.norm <- function(x, weights = NULL){
4
- x = 1 + log(x);
5
- x[x==-Inf] = 0;
6
- x.sum = as.matrix(x) %*% matrix(1,nrow=dim(x)[2],ncol=1);
7
- x.sum = matrix(100/x.sum,nrow=length(x.sum),ncol=dim(x)[2]);
8
- x.norm = x * x.sum;
9
- rm(x.sum);
10
- x.norm[is.na(x.norm)] = 0
11
-
12
- if (!is.null(weights)){
13
- x.norm = x.norm * matrix(abs(weights),ncol=length(weights),nrow=dim(x.norm)[1],byrow=T)
14
- }
15
-
16
- x.norm;
17
- }
18
-
19
-
20
- BOW.classification.model <- function(features, modelfile, dictfile = NULL){
21
- feats = read.table(features, sep="\t", header=T, row.names=1);
22
-
23
- if (!is.null(dictfile)){
24
- svm.weights = read.table(file=dictfile, sep="\t")[2];
25
- }else {
26
- svm.weights = NULL;
27
- }
28
- feats[-1] = BOW.norm(feats[-1], svm.weights);
29
- svm.model = svm(Class ~ ., data=feats, svm.weights);
30
- save(svm.model,svm.weights, file=modelfile);
31
- }
32
-
33
- BOW.classification.classify <- function(modelfile, x, weights = NULL){
34
- x = BOW.norm(x, weights);
35
- predict(modelfile, x);
36
- }
@@ -1,145 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/sources/organism'
3
- require 'rbbt/sources/pubmed'
4
- require 'rbbt/bow/bow'
5
- require 'rbbt/bow/dictionary'
6
- require 'rbbt/bow/classifier'
7
- require 'rbbt/util/misc'
8
-
9
- require 'progress-monitor'
10
- require 'rand'
11
-
12
- $hi = ENV['hi'] || 0.8
13
- $low = ENV['low'] || 0.01
14
- $max = ENV['max'] || 3000
15
- $bigrams = ENV['bigrams'] == 'true' || false
16
-
17
- $ndocs = ENV['ndocs'] || 5000
18
-
19
- desc "Bilds Dictionary and Features for an organism"
20
- rule(/data\/(.*)/) do |t|
21
- org = File.basename(t.name)
22
-
23
- go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq
24
- all = Organism.literature(org).flatten.uniq - go
25
-
26
- ndocs = [go.length, all.length, $ndocs.to_i].min
27
- puts "Using #{ ndocs } from each class\n\n"
28
-
29
- go = go.shuffle[0..ndocs - 1]
30
- all = all.shuffle[0..ndocs - 1]
31
-
32
- dict = Dictionary::KL.new
33
-
34
-
35
-
36
- chunks = all.chunk(50)
37
- Progress.monitor("Building Dictionary for #{ org }: -",1000)
38
- chunks.each{|chunk|
39
- PubMed.get_article(chunk).each{|pmid, article|
40
- words = BagOfWords.terms(article.text,$bigrams)
41
- dict.add(words, :-)
42
- }
43
- }
44
-
45
- chunks = go.chunk(50)
46
- Progress.monitor("Building Dictionary for #{ org }: +",1000)
47
- chunks.each{|chunk|
48
- PubMed.get_article(chunk).each{|pmid, article|
49
- words = BagOfWords.terms(article.text,$bigrams)
50
- dict.add(words, :+)
51
- }
52
- }
53
-
54
- term_weigths = dict.weights(:low => $low.to_f, :hi => $hi.to_f, :limit => $max.to_i)
55
- Open.write(t.name + '.dict', term_weigths.sort.collect{|p| p.join("\t")}.join("\n"))
56
-
57
- terms = term_weigths.keys.sort
58
-
59
- fout = File.open(t.name, 'w')
60
- fout.puts((['Name','Class'] + terms).join("\t"))
61
-
62
- Progress.monitor("Building Features for #{ org }", 1000)
63
- all.each{|pmid|
64
- text = PubMed.get_article(pmid).text
65
- fout.puts(([pmid, :-] + BagOfWords.features(text, terms)).join("\t"))
66
- }
67
- go.each{|pmid|
68
- text = PubMed.get_article(pmid).text
69
- fout.puts(([pmid, :+] + BagOfWords.features(text, terms)).join("\t"))
70
- }
71
-
72
-
73
- fout.close
74
- end
75
-
76
- rule (/model\/(.*)/) => lambda{|n| n.sub(/model/,'data')} do |t|
77
- features = t.name.sub(/model/,'data')
78
- Classifier.create_model(features, t.name, features + '.dict')
79
- end
80
-
81
- rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
82
- model = t.name.sub(/results/,'model')
83
- features = t.name.sub(/results/,'data')
84
- org = File.basename(t.name)
85
-
86
- ndocs = 100
87
-
88
- used = []
89
- if "".respond_to? :collect
90
- used = Open.read(features).collect{|l| l.chomp.split(/\t/).first}[1..-1]
91
- else
92
- used = Open.read(features).lines.collect{|l| l.chomp.split(/\t/).first}[1..-1]
93
- end
94
-
95
- classifier = Classifier.new(model)
96
- go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq - used
97
- all = Organism.literature(org).flatten.uniq - go - used
98
-
99
- go = go.shuffle[0..ndocs - 1]
100
- all = all.shuffle[0..ndocs - 1]
101
-
102
- ndocs = go.length + all.length
103
-
104
- raise "Not enogh unused articles to evaluate" if go.empty? || all.empty?
105
-
106
- features_go = PubMed.get_article(go).collect{|pmid, article|
107
- article = article.text
108
- }
109
- pos = classifier.classify(features_go).select{|v| v == '+'}.length
110
-
111
- features_all = PubMed.get_article(all).collect{|pmid, article|
112
- article = article.text
113
- }
114
- neg = classifier.classify(features_all).select{|v| v == '-'}.length
115
-
116
- puts "#{ pos } #{ neg }"
117
-
118
- precision = (pos + neg) / (ndocs).to_f
119
- recall = pos / go.length.to_f
120
- f1 = ( 2 * precision * recall) / (precision + recall ).to_f
121
-
122
- puts "Precision: #{ precision}, Recall: #{ recall }, F1: #{f1}"
123
- end
124
-
125
- task 'clean' do
126
- FileUtils.rm Dir.glob("data/*")
127
- FileUtils.rm Dir.glob("model/*")
128
- FileUtils.rm Dir.glob("results/*")
129
-
130
- end
131
- task 'all' do
132
- Organism.all.each{|org|
133
- Rake::Task["model/#{ org }"].invoke
134
- }
135
- end
136
- task 'update' do
137
- if $org
138
- FileUtils.rm Dir.glob("**/#{$org}.*") if $force
139
- Rake::Task["model/#{$org}"].invoke
140
- else
141
- Rake::Task['clean'].invoke if $force
142
- Rake::Task['all'].invoke
143
- end
144
- end
145
-
@@ -1,2 +0,0 @@
1
- #!/bin/bash
2
- wget http://pages.cs.wisc.edu/~bsettles/abner/abner.jar
@@ -1,25 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget "http://downloads.sourceforge.net/banner/BANNER_v02.zip?modtime=1196955449&big_mirror=0"
4
- wget "http://downloads.sourceforge.net/banner/gene_model_v02.bin?modtime=1196955509&big_mirror=0"
5
- mv BANNER_v02.zip BANNER.zip
6
- mv gene_model_v02.bin gene_model.bin
7
- unzip BANNER.zip
8
- cd BANNER
9
- libs=`find libs/ -name "*.jar"`
10
- mkdir classes
11
- javac -classpath `echo $libs|sed s/\ /:/g` -d classes `find src/ -name "*.java"`
12
- cd classes
13
- for f in ../libs/*.jar; do jar xf "$f";done
14
- jar cf banner.jar *
15
- mv banner.jar ../..
16
- cd ..
17
- cp -R nlpdata/ ../
18
- cd ..
19
- rm BANNER.zip
20
- rm -Rf BANNER
21
-
22
-
23
-
24
-
25
-
@@ -1,72 +0,0 @@
1
- #!/bin/bash
2
-
3
- mkdir src
4
- cd src
5
- wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2GNandGMgold_Subs.tar.gz"
6
- wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1a.tar.gz"
7
- wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1b.tar.gz"
8
- wget "http://mesh.dl.sourceforge.net/sourceforge/biocreative/biocreative1task2.tar.gz"
9
- wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2geneMention.tar.gz"
10
- wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/bc2normal.1.4.tar.gz"
11
- wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/bc2GNtest.zip"
12
-
13
- for f in *.gz; do tar xfz $f; done
14
- unzip bc2GNtest.zip
15
-
16
- cd ..
17
-
18
- mkdir BC2GM
19
- cp -R src/bc2geneMention/train/ BC2GM/
20
- cp -R src/sourceforgeDistrib-22-Sept-07/genemention/BC2GM/test/ BC2GM/
21
- mv BC2GM/train/alt_eval.perl BC2GM/
22
-
23
- mkdir BC2GN
24
- cp -R src/biocreative2normalization/* BC2GN/
25
- mv BC2GN/noisyTrainingData/ BC2GN/NoisyTrain
26
- mv BC2GN/trainingData/ BC2GN/Train
27
- cp -R src/bc2GNtest/bc2GNtestdocs/ BC2GN/Test
28
- mv BC2GN/NoisyTrain/noisytrain.genelist BC2GN/NoisyTrain/genelist
29
- mv BC2GN/Train/training.genelist BC2GN/Train/genelist
30
- cp src/sourceforgeDistrib-22-Sept-07/genenormalization/bc2test.genelist BC2GN/Test/genelist
31
-
32
- mkdir BC1GN
33
- cp -R src/biocreative1/bc1task1b/* BC1GN/
34
- mv BC1GN/fly/FlyDevTest/ BC1GN/fly/devtest
35
- mv BC1GN/fly/FlyEvaluation/ BC1GN/fly/test
36
- mv BC1GN/fly/FlyNoisyTraining/ BC1GN/fly/train
37
- mv BC1GN/fly/*.list BC1GN/fly/synonyms.list
38
- mv BC1GN/fly/test/*gene_list BC1GN/fly/test/genelist
39
- for f in BC1GN/fly/train/gene_list/*; do cat "$f" >> BC1GN/fly/train/genelist;done
40
- for f in BC1GN/fly/devtest/gene_lists/*; do cat "$f" >> BC1GN/fly/devtest/genelist;done
41
- mv BC1GN/mouse/MouseDevTest/ BC1GN/mouse/devtest
42
- mv BC1GN/mouse/MouseEvaluation/ BC1GN/mouse/test
43
- mv BC1GN/mouse/MouseNoisyTraining/ BC1GN/mouse/train
44
- mv BC1GN/mouse/*.list BC1GN/mouse/synonyms.list
45
- mv BC1GN/mouse/test/*gene_list BC1GN/mouse/test/genelist
46
- for f in BC1GN/mouse/train/gene_list/*; do cat "$f" >> BC1GN/mouse/train/genelist;done
47
- for f in BC1GN/mouse/devtest/gene_lists/*; do cat "$f" >> BC1GN/mouse/devtest/genelist;done
48
- mv BC1GN/yeast/YeastDevTest/ BC1GN/yeast/devtest
49
- mv BC1GN/yeast/YeastEvaluation/ BC1GN/yeast/test
50
- mv BC1GN/yeast/YeastNoisyTraining/ BC1GN/yeast/train
51
- mv BC1GN/yeast/*.list BC1GN/yeast/synonyms.list
52
- mv BC1GN/yeast/test/*gene_list BC1GN/yeast/test/genelist
53
- for f in BC1GN/yeast/train/gene_list/*; do cat "$f" >> BC1GN/yeast/train/genelist;done
54
- for f in BC1GN/yeast/devtest/gene_lists/*; do cat "$f" >> BC1GN/yeast/devtest/genelist;done
55
- # Fix a bug in the perl script! :-|
56
- cat BC1GN/task1Bscorer.pl |grep -v 'else {EVALFILE = STDIN;}' >foo; mv foo BC1GN/task1Bscorer.pl
57
-
58
-
59
-
60
- rm -Rf src
61
-
62
-
63
-
64
-
65
-
66
-
67
-
68
-
69
-
70
-
71
-
72
-
@@ -1,26 +0,0 @@
1
- wget "http://downloads.sourceforge.net/crfpp/CRF%2B%2B-0.51.tar.gz?modtime=1215793886&big_mirror=0" -O crf++.tar.gz
2
- tar xvfz crf++.tar.gz
3
- rm crf++.tar.gz
4
- cd CRF*
5
- PREFIX=$(dirname $PWD)
6
-
7
- if [ `uname -m` == 'x86_64' ]; then
8
- WITH_PIC='--with-pic';
9
- else
10
- WITH_PIC=''
11
- fi
12
-
13
- ./configure --prefix=$PREFIX --exec-prefix=$PREFIX $WITH_PIC;
14
- make install
15
- cd ruby
16
-
17
- ruby extconf.rb --with-opt-lib=$PREFIX/lib/ --with-opt-include=$PREFIX/include/
18
- make
19
- cc -shared -o CRFPP.so CRFPP_wrap.o ../../lib/libcrfpp.a -L. -L/usr/lib -L. -rdynamic -Wl,-export-dynamic -lruby -lpthread -lpthread -ldl -lcrypt -lm -lc -lstdc++
20
-
21
- mkdir ../../ruby/
22
- cp CRFPP.so ../../ruby/
23
- cd ../../
24
- rm -Rf CRF* include
25
-
26
-
@@ -1,4 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz; gunzip gene_info.gz
4
- wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz; gunzip gene2pubmed.gz
@@ -1,4 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo
4
- wget http://www.geneontology.org/GO_slims/goslim_generic.obo
@@ -1,8 +0,0 @@
1
- #!/bin/bash
2
-
3
- wget http://wishart.biology.ualberta.ca/polysearch/include/disease_IDlist.txt -O disease.txt
4
- wget http://wishart.biology.ualberta.ca/polysearch/include/organ_ID.txt -O organ.txt
5
- wget http://wishart.biology.ualberta.ca/polysearch/include/tissue_ID.txt -O tissue.txt
6
- wget http://wishart.biology.ualberta.ca/polysearch/include/subcellular_localization_ID.txt -O subcellular.txt
7
- wget http://wishart.biology.ualberta.ca/polysearch/include/drugnames.txt -O drug.txt
8
- wget http://wishart.biology.ualberta.ca/polysearch/include/HMDBnames.txt -O metabolite.txt
@@ -1,206 +0,0 @@
1
- require 'rbbt/sources/organism'
2
- require 'rbbt/sources/biocreative'
3
- require 'rbbt/ner/rner'
4
-
5
- require 'progress-monitor'
6
-
7
-
8
- $type = ENV['type'] || 'rner'
9
-
10
- #{{{ FEATURES
11
-
12
- def BC2GM_features(dataset, outfile)
13
- data = Biocreative.BC2GM(dataset)
14
-
15
- fout = File.open(outfile,'w')
16
- parser = NERFeatures.new
17
-
18
- Progress.monitor("CRFPP Features BC2GM #{ dataset }")
19
- data.each{|code, info|
20
- text = info[:text]
21
- mentions = info[:mentions]
22
-
23
- features = parser.tagged_features(text,mentions)
24
-
25
- features.each{|feat|
26
- fout.puts feat.join(" ")
27
- }
28
- fout.puts
29
- }
30
- fout.close
31
- end
32
-
33
- def BC2GN_features(dataset, outfile)
34
- data = {}
35
- Dir.glob(File.join(Rbbt.datadir,'biocreative','BC2GN',dataset,'*.txt')).each{|f|
36
- code = File.basename(f).sub(/.txt/,'')
37
- data[code] = {}
38
- data[code][:text] = Open.read(f)
39
- }
40
- Open.read(File.join(Rbbt.datadir,'biocreative','BC2GN',dataset,'genelist')).each_line{|l|
41
- code, gene, mention = l.chomp.split(/\t/)
42
- data[code][:mentions] ||= []
43
- data[code][:mentions] << mention
44
- }
45
-
46
- fout = File.open(outfile,'w')
47
- parser = NERFeatures.new
48
-
49
- Progress.monitor("CRFPP Features BC2GN #{ dataset }")
50
- data.each{|code, info|
51
- text = info[:text]
52
- mentions = info[:mentions]
53
- next if mentions.nil?
54
-
55
- features = parser.tagged_features(text,mentions)
56
-
57
- features.each{|feat|
58
- fout.puts feat.join(" ")
59
- }
60
- fout.puts
61
- }
62
- fout.close
63
- end
64
-
65
- def org_features(org, outfile)
66
- names = Organism.lexicon(org).collect{|code, names|
67
- names
68
- }.flatten
69
-
70
- fout = File.open(outfile,'w')
71
- parser = NERFeatures.new
72
-
73
- Progress.monitor("CRFPP Features #{ org }")
74
- names.each{|name|
75
- features = parser.text_features(name, true)
76
- features.each{|feat|
77
- fout.puts feat.join(" ")
78
- }
79
- fout.puts
80
- }
81
- fout.close
82
-
83
-
84
- end
85
-
86
- file "data/BC2GM_train.features" do |t|
87
- BC2GM_features(:train, 'data/BC2GM_train.features')
88
- end
89
-
90
- file "data/BC2GM_test.features" do |t|
91
- BC2GM_features(:test, 'data/BC2GM_test.features')
92
- end
93
- file "data/BC2GN_Train.features" do |t|
94
- BC2GN_features('Train', 'data/BC2GN_Train.features')
95
- end
96
-
97
- file "data/BC2GN_Test.features" do |t|
98
- BC2GN_features('Test', 'data/BC2GN_Test.features')
99
- end
100
-
101
-
102
- file "data/BC2GM.features" => ['data/BC2GM_train.features','data/BC2GM_test.features'] do |t|
103
- Open.write('data/BC2GM.features',Open.read('data/BC2GM_train.features'))
104
- Open.append('data/BC2GM.features',Open.read('data/BC2GM_test.features'))
105
- end
106
-
107
- file "data/BC2GN.features" => ['data/BC2GN_Train.features','data/BC2GN_Test.features'] do |t|
108
- Open.write('data/BC2GN.features',Open.read('data/BC2GN_Train.features'))
109
- Open.append('data/BC2GN.features',Open.read('data/BC2GN_Test.features'))
110
- end
111
-
112
-
113
- file "data/BC2.features" => ['data/BC2GN.features','data/BC2GM.features'] do |t|
114
- Open.write('data/BC2.features',Open.read('data/BC2GM.features'))
115
- Open.append('data/BC2.features',Open.read('data/BC2GN.features'))
116
- end
117
-
118
- file "data/train.features" => [
119
- #'data/BC2GN.features',
120
- 'data/BC2GM_train.features'
121
- ] do |t|
122
- t.prerequisites.each_with_index{|f,i|
123
- if i == 0
124
- Open.write('data/train.features',Open.read(f))
125
- else
126
- Open.append('data/train.features',Open.read(f))
127
- end
128
- }
129
- end
130
-
131
- rule (/data\/(.*).features/) => ['data/BC2.features'] do |t|
132
- org = File.basename(t.name).sub(/.features$/,'')
133
- org_features(org, t.name)
134
- Open.append(t.name, Open.read('data/BC2.features'))
135
- end
136
-
137
-
138
-
139
- #{{{ MODEL
140
- rule (/model\/(.*)/) => lambda {|t| t.sub(/model/,'data') + '.features'} do |t|
141
- parser = NERFeatures.new
142
- parser.train( t.name.sub(/model/,'data') + '.features', t.name)
143
- end
144
-
145
- task 'clean' do
146
- FileUtils.rm Dir.glob("data/*")
147
- FileUtils.rm Dir.glob("model/*")
148
- FileUtils.rm Dir.glob("results/*")
149
-
150
- end
151
-
152
- task 'all' do
153
- Organism.all.each{|org|
154
- Rake::Task["model/#{ org }"].invoke
155
- }
156
- end
157
-
158
- task 'default' do
159
- if $org
160
- FileUtils.rm Dir.glob("**/#{$org}.*") if $force
161
- Rake::Task["model/#{$org}"].invoke
162
- else
163
- Rake::Task['clean'].invoke if $force
164
- Rake::Task['all'].invoke
165
- end
166
- end
167
-
168
- #{{{ EVALUATE
169
-
170
-
171
- def find(model, type, outfile)
172
- ner = Organism.ner(:human,type,:model => model)
173
-
174
- data = Biocreative.BC2GM(:test)
175
-
176
- fout = File.open(outfile,'w')
177
-
178
- Progress.monitor("Test")
179
- data.each{|code,info|
180
- text = info[:text]
181
- mentions = ner.extract(text)
182
-
183
- mentions.each{|mention|
184
- positions = Biocreative.position(text,mention)
185
- positions.each{|pos|
186
- fout.puts "#{code}|#{pos[0]} #{pos[1]}|#{mention}"
187
- }
188
- }
189
- }
190
-
191
- end
192
-
193
-
194
-
195
- rule (/results\/test$/) do |t|
196
- org = File.basename(t.name)
197
-
198
- if $type == 'rner'
199
- Rake::Task['model/train'].invoke
200
- end
201
- find('model/train',$type,t.name)
202
- end
203
-
204
- rule (/results\/test.eval$/) => ['results/test'] do |t|
205
- Biocreative.BC2GM_eval('results/test',:test, 'results/test.eval')
206
- end