RubyGems - rbbt - Versions diffs - 1.2.5 → 2.0.0 - Mend

rbbt 1.2.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

checksums.yaml +7 -0
data/README.rdoc +2 -138
metadata +69 -214
data/LICENSE +0 -20
data/bin/rbbt_config +0 -245
data/install_scripts/classifier/R/classify.R +0 -36
data/install_scripts/classifier/Rakefile +0 -140
data/install_scripts/get_abner.sh +0 -2
data/install_scripts/get_banner.sh +0 -25
data/install_scripts/get_biocreative.sh +0 -72
data/install_scripts/get_crf++.sh +0 -26
data/install_scripts/get_entrez.sh +0 -4
data/install_scripts/get_go.sh +0 -4
data/install_scripts/get_polysearch.sh +0 -8
data/install_scripts/ner/Rakefile +0 -206
data/install_scripts/ner/config/default.rb +0 -52
data/install_scripts/norm/Rakefile +0 -219
data/install_scripts/norm/config/cue_default.rb +0 -10
data/install_scripts/norm/config/tokens_default.rb +0 -86
data/install_scripts/norm/functions.sh +0 -23
data/install_scripts/organisms/Ath.Rakefile +0 -55
data/install_scripts/organisms/Cal.Rakefile +0 -84
data/install_scripts/organisms/Cel.Rakefile +0 -109
data/install_scripts/organisms/Hsa.Rakefile +0 -140
data/install_scripts/organisms/Mmu.Rakefile +0 -77
data/install_scripts/organisms/Rakefile +0 -43
data/install_scripts/organisms/Rno.Rakefile +0 -88
data/install_scripts/organisms/Sce.Rakefile +0 -66
data/install_scripts/organisms/Spo.Rakefile +0 -40
data/install_scripts/organisms/rake-include.rb +0 -252
data/install_scripts/wordlists/consonants +0 -897
data/install_scripts/wordlists/stopwords +0 -1
data/lib/rbbt.rb +0 -83
data/lib/rbbt/bow/bow.rb +0 -88
data/lib/rbbt/bow/classifier.rb +0 -116
data/lib/rbbt/bow/dictionary.rb +0 -187
data/lib/rbbt/ner/abner.rb +0 -34
data/lib/rbbt/ner/banner.rb +0 -73
data/lib/rbbt/ner/dictionaryNER.rb +0 -98
data/lib/rbbt/ner/regexpNER.rb +0 -70
data/lib/rbbt/ner/rner.rb +0 -227
data/lib/rbbt/ner/rnorm.rb +0 -143
data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
data/lib/rbbt/sources/biocreative.rb +0 -75
data/lib/rbbt/sources/biomart.rb +0 -105
data/lib/rbbt/sources/entrez.rb +0 -211
data/lib/rbbt/sources/go.rb +0 -85
data/lib/rbbt/sources/gscholar.rb +0 -74
data/lib/rbbt/sources/organism.rb +0 -241
data/lib/rbbt/sources/polysearch.rb +0 -117
data/lib/rbbt/sources/pubmed.rb +0 -248
data/lib/rbbt/util/arrayHash.rb +0 -266
data/lib/rbbt/util/filecache.rb +0 -72
data/lib/rbbt/util/index.rb +0 -47
data/lib/rbbt/util/misc.rb +0 -106
data/lib/rbbt/util/open.rb +0 -251
data/lib/rbbt/util/rake.rb +0 -183
data/lib/rbbt/util/simpleDSL.rb +0 -87
data/lib/rbbt/util/tmpfile.rb +0 -35
data/tasks/install.rake +0 -124
data/test/rbbt/bow/test_bow.rb +0 -33
data/test/rbbt/bow/test_classifier.rb +0 -72
data/test/rbbt/bow/test_dictionary.rb +0 -91
data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
data/test/rbbt/ner/test_abner.rb +0 -17
data/test/rbbt/ner/test_banner.rb +0 -17
data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
data/test/rbbt/ner/test_regexpNER.rb +0 -33
data/test/rbbt/ner/test_rner.rb +0 -126
data/test/rbbt/ner/test_rnorm.rb +0 -47
data/test/rbbt/sources/test_biocreative.rb +0 -38
data/test/rbbt/sources/test_biomart.rb +0 -31
data/test/rbbt/sources/test_entrez.rb +0 -49
data/test/rbbt/sources/test_go.rb +0 -24
data/test/rbbt/sources/test_organism.rb +0 -59
data/test/rbbt/sources/test_polysearch.rb +0 -27
data/test/rbbt/sources/test_pubmed.rb +0 -39
data/test/rbbt/util/test_arrayHash.rb +0 -257
data/test/rbbt/util/test_filecache.rb +0 -37
data/test/rbbt/util/test_index.rb +0 -31
data/test/rbbt/util/test_misc.rb +0 -20
data/test/rbbt/util/test_open.rb +0 -110
data/test/rbbt/util/test_simpleDSL.rb +0 -57
data/test/rbbt/util/test_tmpfile.rb +0 -21
data/test/test_helper.rb +0 -4
data/test/test_rbbt.rb +0 -11

data/install_scripts/get_biocreative.sh DELETED

@@ -1,72 +0,0 @@
-#!/bin/bash
-mkdir src
-cd src
-wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2GNandGMgold_Subs.tar.gz"
-wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1a.tar.gz"
-wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/biocreative1task1b.tar.gz"
-wget "http://mesh.dl.sourceforge.net/sourceforge/biocreative/biocreative1task2.tar.gz"
-wget "http://garr.dl.sourceforge.net/sourceforge/biocreative/bc2geneMention.tar.gz"
-wget "http://switch.dl.sourceforge.net/sourceforge/biocreative/bc2normal.1.4.tar.gz"
-wget "http://kent.dl.sourceforge.net/sourceforge/biocreative/bc2GNtest.zip"
-for f in *.gz; do tar xfz $f; done
-unzip bc2GNtest.zip
-cd ..
-mkdir BC2GM
-cp -R src/bc2geneMention/train/ BC2GM/
-cp -R src/sourceforgeDistrib-22-Sept-07/genemention/BC2GM/test/ BC2GM/
-mv BC2GM/train/alt_eval.perl BC2GM/
-mkdir BC2GN
-cp -R src/biocreative2normalization/* BC2GN/
-mv BC2GN/noisyTrainingData/ BC2GN/NoisyTrain
-mv BC2GN/trainingData/ BC2GN/Train
-cp -R src/bc2GNtest/bc2GNtestdocs/ BC2GN/Test
-mv BC2GN/NoisyTrain/noisytrain.genelist BC2GN/NoisyTrain/genelist
-mv BC2GN/Train/training.genelist BC2GN/Train/genelist
-cp src/sourceforgeDistrib-22-Sept-07/genenormalization/bc2test.genelist BC2GN/Test/genelist
-mkdir BC1GN
-cp -R src/biocreative1/bc1task1b/* BC1GN/
-mv BC1GN/fly/FlyDevTest/ BC1GN/fly/devtest
-mv BC1GN/fly/FlyEvaluation/ BC1GN/fly/test
-mv BC1GN/fly/FlyNoisyTraining/ BC1GN/fly/train
-mv BC1GN/fly/*.list  BC1GN/fly/synonyms.list
-mv BC1GN/fly/test/*gene_list  BC1GN/fly/test/genelist
-for f in BC1GN/fly/train/gene_list/*; do cat "$f" >> BC1GN/fly/train/genelist;done
-for f in BC1GN/fly/devtest/gene_lists/*; do cat "$f" >> BC1GN/fly/devtest/genelist;done
-mv BC1GN/mouse/MouseDevTest/ BC1GN/mouse/devtest
-mv BC1GN/mouse/MouseEvaluation/ BC1GN/mouse/test
-mv BC1GN/mouse/MouseNoisyTraining/ BC1GN/mouse/train
-mv BC1GN/mouse/*.list  BC1GN/mouse/synonyms.list
-mv BC1GN/mouse/test/*gene_list  BC1GN/mouse/test/genelist
-for f in BC1GN/mouse/train/gene_list/*; do cat "$f" >> BC1GN/mouse/train/genelist;done
-for f in BC1GN/mouse/devtest/gene_lists/*; do cat "$f" >> BC1GN/mouse/devtest/genelist;done
-mv BC1GN/yeast/YeastDevTest/ BC1GN/yeast/devtest
-mv BC1GN/yeast/YeastEvaluation/ BC1GN/yeast/test
-mv BC1GN/yeast/YeastNoisyTraining/ BC1GN/yeast/train
-mv BC1GN/yeast/*.list  BC1GN/yeast/synonyms.list
-mv BC1GN/yeast/test/*gene_list  BC1GN/yeast/test/genelist
-for f in BC1GN/yeast/train/gene_list/*; do cat "$f" >> BC1GN/yeast/train/genelist;done
-for f in BC1GN/yeast/devtest/gene_lists/*; do cat "$f" >> BC1GN/yeast/devtest/genelist;done
-# Fix a bug in the perl script! :-|
-cat BC1GN/task1Bscorer.pl |grep -v 'else {EVALFILE = STDIN;}' >foo; mv foo BC1GN/task1Bscorer.pl
-rm -Rf src

data/install_scripts/get_crf++.sh DELETED

@@ -1,26 +0,0 @@
-wget "http://downloads.sourceforge.net/crfpp/CRF%2B%2B-0.51.tar.gz?modtime=1215793886&big_mirror=0" -O crf++.tar.gz
-tar xvfz crf++.tar.gz
-rm crf++.tar.gz
-cd CRF*
-PREFIX=$(dirname $PWD)
-if [ `uname -m` == 'x86_64' ]; then
-  WITH_PIC='--with-pic';
-else
-  WITH_PIC=''
-fi
-./configure  --prefix=$PREFIX --exec-prefix=$PREFIX $WITH_PIC;
-make install
-cd ruby
-ruby extconf.rb  --with-opt-lib=$PREFIX/lib/ --with-opt-include=$PREFIX/include/
-make
-cc -shared -o CRFPP.so CRFPP_wrap.o ../../lib/libcrfpp.a  -L. -L/usr/lib  -L.  -rdynamic -Wl,-export-dynamic    -lruby -lpthread  -lpthread -ldl -lcrypt -lm   -lc -lstdc++
-mkdir ../../ruby/
-cp CRFPP.so ../../ruby/
-cd ../../
-rm -Rf CRF* include

data/install_scripts/get_entrez.sh DELETED

@@ -1,4 +0,0 @@
-#!/bin/bash
-wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene_info.gz; gunzip gene_info.gz
-wget ftp://ftp.ncbi.nih.gov/gene/DATA/gene2pubmed.gz; gunzip gene2pubmed.gz

data/install_scripts/get_go.sh DELETED

@@ -1,4 +0,0 @@
-#!/bin/bash
-wget ftp://ftp.geneontology.org/pub/go/ontology/gene_ontology.obo
-wget http://www.geneontology.org/GO_slims/goslim_generic.obo

data/install_scripts/get_polysearch.sh DELETED

@@ -1,8 +0,0 @@
-#!/bin/bash
-wget http://wishart.biology.ualberta.ca/polysearch/include/disease_IDlist.txt -O disease.txt
-wget http://wishart.biology.ualberta.ca/polysearch/include/organ_ID.txt -O organ.txt
-wget http://wishart.biology.ualberta.ca/polysearch/include/tissue_ID.txt -O tissue.txt
-wget http://wishart.biology.ualberta.ca/polysearch/include/subcellular_localization_ID.txt -O subcellular.txt
-wget http://wishart.biology.ualberta.ca/polysearch/include/drugnames.txt -O drug.txt
-wget http://wishart.biology.ualberta.ca/polysearch/include/HMDBnames.txt -O metabolite.txt

data/install_scripts/ner/Rakefile DELETED

@@ -1,206 +0,0 @@
-require 'rbbt/sources/organism'
-require 'rbbt/sources/biocreative'
-require 'rbbt/ner/rner'
-require 'progress-monitor'
-$type = ENV['type'] || 'rner'
-#{{{ FEATURES
-def BC2GM_features(dataset, outfile)
-  data = Biocreative.BC2GM(dataset)
-  fout = File.open(outfile,'w')
-  parser   = NERFeatures.new
-  Progress.monitor("CRFPP Features BC2GM #{ dataset }")
-  data.each{|code, info|
-    text = info[:text]
-    mentions = info[:mentions]
-    features = parser.tagged_features(text,mentions)
-    features.each{|feat|
-      fout.puts feat.join(" ")
-    }
-    fout.puts
-  }
-  fout.close
-end
-def BC2GN_features(dataset, outfile)
-  data = {}
-  Dir.glob(File.join(Rbbt.datadir,'biocreative','BC2GN',dataset,'*.txt')).each{|f|
-    code = File.basename(f).sub(/.txt/,'')
-    data[code] = {}
-    data[code][:text] = Open.read(f)
-  }
-  Open.read(File.join(Rbbt.datadir,'biocreative','BC2GN',dataset,'genelist')).each_line{|l|
-   code, gene, mention = l.chomp.split(/\t/)
-   data[code][:mentions] ||= []
-   data[code][:mentions] << mention
-  }
-  fout = File.open(outfile,'w')
-  parser   = NERFeatures.new
-  Progress.monitor("CRFPP Features BC2GN #{ dataset }")
-  data.each{|code, info|
-    text = info[:text]
-    mentions = info[:mentions]
-    next if mentions.nil?
-    features = parser.tagged_features(text,mentions)
-    features.each{|feat|
-      fout.puts feat.join(" ")
-    }
-    fout.puts
-  }
-  fout.close
-end
-def org_features(org, outfile)
-  names = Organism.lexicon(org).collect{|code, names|
-    names
-  }.flatten
-  fout = File.open(outfile,'w')
-  parser   = NERFeatures.new
-  Progress.monitor("CRFPP Features #{ org }")
-  names.each{|name|
-    features = parser.text_features(name, true)
-    features.each{|feat|
-      fout.puts feat.join(" ")
-    }
-    fout.puts
-  }
-  fout.close
-end
-file "data/BC2GM_train.features" do |t|
-  BC2GM_features(:train, 'data/BC2GM_train.features')
-end
-file "data/BC2GM_test.features" do |t|
-  BC2GM_features(:test, 'data/BC2GM_test.features')
-end
-file "data/BC2GN_Train.features" do |t|
-  BC2GN_features('Train', 'data/BC2GN_Train.features')
-end
-file "data/BC2GN_Test.features" do |t|
-  BC2GN_features('Test', 'data/BC2GN_Test.features')
-end
-file "data/BC2GM.features" => ['data/BC2GM_train.features','data/BC2GM_test.features'] do |t|
-  Open.write('data/BC2GM.features',Open.read('data/BC2GM_train.features'))
-  Open.append('data/BC2GM.features',Open.read('data/BC2GM_test.features'))
-end
-file "data/BC2GN.features" => ['data/BC2GN_Train.features','data/BC2GN_Test.features'] do |t|
-  Open.write('data/BC2GN.features',Open.read('data/BC2GN_Train.features'))
-  Open.append('data/BC2GN.features',Open.read('data/BC2GN_Test.features'))
-end
-file "data/BC2.features" => ['data/BC2GN.features','data/BC2GM.features'] do |t|
-  Open.write('data/BC2.features',Open.read('data/BC2GM.features'))
-  Open.append('data/BC2.features',Open.read('data/BC2GN.features'))
-end
-file "data/train.features" => [
-  #'data/BC2GN.features',
-  'data/BC2GM_train.features'
-  ] do |t|
-  t.prerequisites.each_with_index{|f,i|
-    if i == 0
-      Open.write('data/train.features',Open.read(f))
-    else
-      Open.append('data/train.features',Open.read(f))
-    end
-  }
-end
-rule (/data\/(.*).features/) =>  ['data/BC2.features'] do |t|
-  org = File.basename(t.name).sub(/.features$/,'')
-  org_features(org, t.name)
-  Open.append(t.name, Open.read('data/BC2.features'))
-end
-#{{{ MODEL
-rule (/model\/(.*)/) => lambda {|t| t.sub(/model/,'data') + '.features'} do |t|
-  parser = NERFeatures.new
-  parser.train( t.name.sub(/model/,'data') + '.features', t.name)
-end
-task 'clean' do
-  FileUtils.rm Dir.glob("data/*")
-  FileUtils.rm Dir.glob("model/*")
-  FileUtils.rm Dir.glob("results/*")
-end
-task 'all' do
-  Organism.all.each{|org|
-    Rake::Task["model/#{ org }"].invoke
-  }
-end
-task 'default' do
-  if $org
-    FileUtils.rm Dir.glob("**/#{$org}.*") if $force
-    Rake::Task["model/#{$org}"].invoke
-  else
-    Rake::Task['clean'].invoke if $force
-    Rake::Task['all'].invoke
-  end
-end
-#{{{ EVALUATE
-def find(model, type, outfile)
-  ner = Organism.ner(:human,type,:model => model)
-  data = Biocreative.BC2GM(:test)
-  fout = File.open(outfile,'w')
-  Progress.monitor("Test")
-  data.each{|code,info|
-    text = info[:text]
-    mentions = ner.extract(text)
-    mentions.each{|mention|
-      positions = Biocreative.position(text,mention)
-      positions.each{|pos|
-        fout.puts "#{code}|#{pos[0]} #{pos[1]}|#{mention}"
-      }
-    }
-  }
-end
-rule (/results\/test$/)  do |t|
-  org = File.basename(t.name)
-  if $type == 'rner'
-    Rake::Task['model/train'].invoke
-  end
-  find('model/train',$type,t.name)
-end
-rule (/results\/test.eval$/) => ['results/test'] do |t|
-  Biocreative.BC2GM_eval('results/test',:test, 'results/test.eval')
-end

data/install_scripts/ner/config/default.rb DELETED

@@ -1,52 +0,0 @@
-isLetters     /^[A-Z]+$/i
-isUpper       /^[A-Z]+$/
-isLower       /^[a-z]+$/
-isDigits      /^[0-9]+$/i
-isRoman       /^[IVX]+$/
-isGreek       /^(?:alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)$/i
-isPunctuation /^[,.;]$/
-isDelim       /^[\/()\[\]{}\-]$/
-isNonWord     /^[^\w]+$/
-isConjunction /^and|or|&|,$/
-hasLetters    /[A-Z]/i
-hasUpper      /.[A-Z]/
-hasLower      /[a-z]/
-hasDigits     /[0-9]/i
-hasGreek      /(?:alpha|beta|gamma|delta|epsilon|zeta|eta|theta|iota|kappa|lambda|mu|nu|xi|omicron|pi|rho|sigma|tau|upsilon|phi|chi|psi|omega)/i
-hasPunctuation /[,.;]/
-hasDelim      /[\/()\[\]{}\-]/
-hasNonWord    /[^\w]/
-caspMix       /[a-z].[A-Z]/
-keywords      /(?:protein|gene|domain|ase)s?$/
-hasSuffix     /[a-z][A-Z0-9]$/
-numLetters    do |w| w.scan(/[A-Z]/i).length end
-numDigits     do |w| w.scan(/[0-9]/).length end
-#
-prefix_3      /^(...)/
-prefix_4      /^(....)/
-suffix_3      /(...)$/
-suffix_4      /(....)$/
-token1        do |w|
-                 w.sub(/[A-Z]/,'A').
-                   sub(/[a-z]/,'a').
-                   sub(/[0-9]/,'0').
-                   sub(/[^0-9a-z]/i,'x')
-              end
-token2        do  |w|
-                 w.sub(/[A-Z]+/,'A').
-                   sub(/[a-z]+/,'a').
-                   sub(/[0-9]+/,'0').
-                   sub(/[^0-9a-z]+/i,'x')
-               end
-token3         do |w| w.downcase end
-special        do |w| w.is_special? end
-context   %w(special token2 isPunctuation isDelim)
-window     %w(1 2 3 -1 -2 -3)
-#direction :reverse

data/install_scripts/norm/Rakefile DELETED

@@ -1,219 +0,0 @@
-require 'rbbt'
-require 'rbbt/sources/organism'
-require 'rbbt/util/open'
-require 'rbbt/ner/rner'
-require 'rbbt/ner/rnorm'
-require 'progress-monitor'
-$type = ENV['ner'] || :rner
-$debug = !ENV['debug'].nil?
-$perfect = !ENV['perfect'].nil?
-$docs  = ENV['docs']
-$org2rbbt = {
-  'yeast' => 'Sce',
-  'mouse' => 'Mmu',
-  'fly' => 'Sce',
-  'bc2gn' => 'Hsa',
-}
-def match(org, filedir, goldstandard,outfile)
-  t = Time.now
-  if org == 'bc2gn'
-    custom_file = File.join('config', org + '.config')
-    norm = Normalizer.new(File.join(Rbbt.datadir,"biocreative/BC2GN/entrezGeneLexicon.list"),
-                        :to_entrez => false,
-                        :file => (File.exist?(custom_file) ? custom_file : nil),
-                        :max_candidates => 200)
-  else
-    custom_file = File.join('config', org + '.config')
-    norm = Normalizer.new(File.join(Rbbt.datadir,"biocreative/BC1GN/#{org}/synonyms.list"),
-                        :to_entrez => Open.to_hash(File.join(Rbbt.datadir,"organisms/#{$org2rbbt[org]}/identifiers"),
-                                     :native => 0, :extra => 1,:single => true, :sep => "\t|\\|",
-                                     :fix => proc{|l| l.sub(/S000/,'S0')}),
-                        :file => (File.exist?(custom_file) ? custom_file : nil),
-                        :max_candidates => 200)
-  end
-  STDERR.puts "Loaded Normalizer #{Time.now - t}\n\n"
-  if $type.to_s == 'rner'
-    ner = NER.new('models/' + org)
-  else
-    ner = Organism.ner($org2rbbt[org], $type)
-  end
-  fout=File.open(outfile,'w')
-  gs  = Open.to_hash(goldstandard,:native => 0,:extra => 1)
-  gs_mentions  = Open.to_hash(goldstandard,:native => 0,:extra => 2)
-  if org == 'bc2gn'
-    lex = Open.to_hash( File.join(Rbbt.datadir,"biocreative/BC2GN/entrezGeneLexicon.list"), :sep => "\t|\\|")
-  else
-    lex = Open.to_hash( File.join(Rbbt.datadir,"biocreative/BC1GN/#{org}/synonyms.list"), :sep => "\t|\\|")
-  end
-  if $docs
-    files = $docs.split(',').collect{|doc| File.join(filedir, doc + '.txt')}
-  else
-    files = Dir.glob(filedir + '*.txt').sort
-  end
-  Progress.monitor("Processing Files")
-  files.each{|f|
-    fid = File.basename(f).sub(/.txt/,'')
-    text = Open.read(f)
-    if $perfect
-      mentions = (gs_mentions[fid] || []).flatten
-    else
-      mentions = ner.extract(text).uniq
-    end
-    if $debug
-      puts "------------------------------------"
-      puts "FILE #{fid}"
-      puts
-      puts text
-      puts "CODES: #{(gs[fid] || []).flatten.join(", ")}"
-      puts "MENTIONS: #{mentions.join(", ")}"
-    end
-    found = []
-    mentions.each{|mention|
-      codes = norm.select(norm.match(mention),mention,text)
-      found += codes
-      codes.each{|code|
-        #code = code.sub(/S000/,'S0')
-        fout.puts "#{ fid }\t#{ code}\t#{mention}"
-      }
-      puts "Mention: #{ mention } => #{ codes.join(", ") }"  if $debug
-    }
-    if $debug
-      found.uniq!
-      fn = (gs[fid] || []).flatten.uniq - found
-      fp = found - (gs[fid] || []).flatten.uniq
-      fn.each{|code|
-        if lex[code]
-          puts "FN: #{ code } => #{lex[code].flatten.join(", ")}"
-        else
-           puts "FN: #{ code }"
-        end
-     }
-      fp.each{|code|
-        if lex[code]
-          puts "FP: #{ code } => #{lex[code].flatten.join(", ")}"
-        else
-           puts "FN: #{ code }"
-        end
-      }
-    end
-  }
-  fout.close
-end
-rule (/models\/(yeast|mouse|fly|bc2gn).features/) do |t|
-  org = File.basename(t.name).sub(/\.features/,'')
-  if org == 'bc2gn'
-    lexicon = File.join(Rbbt.datadir, "biocreative/BC2GN/entrezGeneLexicon.list")
-  else
-    lexicon = File.join(Rbbt.datadir, "biocreative/BC1GN/#{ org }/synonyms.list")
-  end
-  names = File.open(lexicon).collect{|l|
-    names = l.split(/\t/)
-    names.shift
-    names.compact.select{|n| !n.empty?}
-  }.flatten
-  fout = File.open(t.name,'w')
-  parser   = NERFeatures.new
-  Progress.monitor("CRFPP Features #{ org }")
-  names.each{|name|
-    features = parser.text_features(name, true)
-    features.each{|feat|
-      fout.puts feat.join(" ")
-    }
-    fout.puts
-  }
-  fout.close
-  if org != 'bc2gn'
-    Open.append(t.name, Open.read('../ner/data/BC2.features'))
-  else
-    Open.append(t.name, Open.read('../ner/data/BC2GM.features'))
-    Open.append(t.name, Open.read('../ner/data/BC2GN_Train.features'))
-  end
-end
-rule (/models\/(yeast|mouse|fly|bc2gn)$/) => lambda{|t| t + '.features' } do |t|
-  org = File.basename(t.name)
-  parser = NERFeatures.new
-  parser.train( t.name + '.features', t.name)
-end
-rule (/results\/(yeast|mouse|fly)_(devtest|train|test)$/) do |t|
-  org, dataset = File.basename(t.name).split(/_/)
-  if $type.to_sym == :rner
-    Rake::Task['models/' + org].invoke
-  end
-  filedir      = File.join(Rbbt.datadir, "biocreative/BC1GN/#{ org }/#{ dataset }/text/")
-  goldstandard = File.join(Rbbt.datadir, "biocreative/BC1GN/#{ org }/#{ dataset }/genelist")
-  match(org,filedir, goldstandard,t.name)
-end
-rule (/results\/(.+)_(.+).eval/) => lambda{|t| t.sub(/.eval/,'')} do |t|
-  org, dataset = File.basename(t.name.sub(/.eval/,'')).split(/_/)
-  cmd = "perl #{File.join(Rbbt.datadir, "biocreative/BC1GN/task1Bscorer.pl")} #{File.join(Rbbt.datadir, "biocreative/BC1GN/#{ org }/#{ dataset }/genelist")} #{t.name.sub(/.eval/,'')} > #{t.name}"
-  puts cmd
-  system cmd
-end
-rule (/results\/bc2gn$/) do |t|
-  org = 'bc2gn'
-  if $type.to_sym == :rner
-    Rake::Task['models/' + org].invoke
-  end
-  filedir      = File.join(Rbbt.datadir, "biocreative/BC2GN/Test/")
-  goldstandard = File.join(Rbbt.datadir, "biocreative/BC2GN/Test/genelist")
-  match(org,filedir, goldstandard,t.name)
-end
-rule (/results\/bc2gn.eval/) => lambda{|t| t.sub(/.eval/,'')} do |t|
-  cmd = "python #{Rbbt.datadir + '/biocreative/BC2GN/bc2scoring.py'} #{Rbbt.datadir + '/biocreative/BC2GN/Test/genelist'} results/bc2gn > #{t.name}"
-  system cmd
-end