RubyGems - rbbt - Versions diffs - 1.2.5 → 2.0.0 - Mend

rbbt 1.2.5 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

checksums.yaml +7 -0
data/README.rdoc +2 -138
metadata +69 -214
data/LICENSE +0 -20
data/bin/rbbt_config +0 -245
data/install_scripts/classifier/R/classify.R +0 -36
data/install_scripts/classifier/Rakefile +0 -140
data/install_scripts/get_abner.sh +0 -2
data/install_scripts/get_banner.sh +0 -25
data/install_scripts/get_biocreative.sh +0 -72
data/install_scripts/get_crf++.sh +0 -26
data/install_scripts/get_entrez.sh +0 -4
data/install_scripts/get_go.sh +0 -4
data/install_scripts/get_polysearch.sh +0 -8
data/install_scripts/ner/Rakefile +0 -206
data/install_scripts/ner/config/default.rb +0 -52
data/install_scripts/norm/Rakefile +0 -219
data/install_scripts/norm/config/cue_default.rb +0 -10
data/install_scripts/norm/config/tokens_default.rb +0 -86
data/install_scripts/norm/functions.sh +0 -23
data/install_scripts/organisms/Ath.Rakefile +0 -55
data/install_scripts/organisms/Cal.Rakefile +0 -84
data/install_scripts/organisms/Cel.Rakefile +0 -109
data/install_scripts/organisms/Hsa.Rakefile +0 -140
data/install_scripts/organisms/Mmu.Rakefile +0 -77
data/install_scripts/organisms/Rakefile +0 -43
data/install_scripts/organisms/Rno.Rakefile +0 -88
data/install_scripts/organisms/Sce.Rakefile +0 -66
data/install_scripts/organisms/Spo.Rakefile +0 -40
data/install_scripts/organisms/rake-include.rb +0 -252
data/install_scripts/wordlists/consonants +0 -897
data/install_scripts/wordlists/stopwords +0 -1
data/lib/rbbt.rb +0 -83
data/lib/rbbt/bow/bow.rb +0 -88
data/lib/rbbt/bow/classifier.rb +0 -116
data/lib/rbbt/bow/dictionary.rb +0 -187
data/lib/rbbt/ner/abner.rb +0 -34
data/lib/rbbt/ner/banner.rb +0 -73
data/lib/rbbt/ner/dictionaryNER.rb +0 -98
data/lib/rbbt/ner/regexpNER.rb +0 -70
data/lib/rbbt/ner/rner.rb +0 -227
data/lib/rbbt/ner/rnorm.rb +0 -143
data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
data/lib/rbbt/sources/biocreative.rb +0 -75
data/lib/rbbt/sources/biomart.rb +0 -105
data/lib/rbbt/sources/entrez.rb +0 -211
data/lib/rbbt/sources/go.rb +0 -85
data/lib/rbbt/sources/gscholar.rb +0 -74
data/lib/rbbt/sources/organism.rb +0 -241
data/lib/rbbt/sources/polysearch.rb +0 -117
data/lib/rbbt/sources/pubmed.rb +0 -248
data/lib/rbbt/util/arrayHash.rb +0 -266
data/lib/rbbt/util/filecache.rb +0 -72
data/lib/rbbt/util/index.rb +0 -47
data/lib/rbbt/util/misc.rb +0 -106
data/lib/rbbt/util/open.rb +0 -251
data/lib/rbbt/util/rake.rb +0 -183
data/lib/rbbt/util/simpleDSL.rb +0 -87
data/lib/rbbt/util/tmpfile.rb +0 -35
data/tasks/install.rake +0 -124
data/test/rbbt/bow/test_bow.rb +0 -33
data/test/rbbt/bow/test_classifier.rb +0 -72
data/test/rbbt/bow/test_dictionary.rb +0 -91
data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
data/test/rbbt/ner/test_abner.rb +0 -17
data/test/rbbt/ner/test_banner.rb +0 -17
data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
data/test/rbbt/ner/test_regexpNER.rb +0 -33
data/test/rbbt/ner/test_rner.rb +0 -126
data/test/rbbt/ner/test_rnorm.rb +0 -47
data/test/rbbt/sources/test_biocreative.rb +0 -38
data/test/rbbt/sources/test_biomart.rb +0 -31
data/test/rbbt/sources/test_entrez.rb +0 -49
data/test/rbbt/sources/test_go.rb +0 -24
data/test/rbbt/sources/test_organism.rb +0 -59
data/test/rbbt/sources/test_polysearch.rb +0 -27
data/test/rbbt/sources/test_pubmed.rb +0 -39
data/test/rbbt/util/test_arrayHash.rb +0 -257
data/test/rbbt/util/test_filecache.rb +0 -37
data/test/rbbt/util/test_index.rb +0 -31
data/test/rbbt/util/test_misc.rb +0 -20
data/test/rbbt/util/test_open.rb +0 -110
data/test/rbbt/util/test_simpleDSL.rb +0 -57
data/test/rbbt/util/test_tmpfile.rb +0 -21
data/test/test_helper.rb +0 -4
data/test/test_rbbt.rb +0 -11

data/test/rbbt/ner/test_abner.rb DELETED

@@ -1,17 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt/ner/abner'
-require 'test/unit'
-class TestAbner < Test::Unit::TestCase
-  def test_extract
-      ner = Abner.new
-      mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
-      ["SHP-2", "SHIP", "Shc"].each{|mention|
-        assert(mentions.include? mention)
-      }
-  end
-end

data/test/rbbt/ner/test_banner.rb DELETED

@@ -1,17 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt/ner/banner'
-require 'test/unit'
-class TestBanner < Test::Unit::TestCase
-  def test_extract
-      ner = Banner.new
-      mentions = ner.extract(" The P-ITIM-compelled multi-phosphoprotein complex binds to and activates SHP-2, which in turn dephosphorylates SHIP and Shc and probably other substrates.")
-      ["SHP - 2", "SHIP", "Shc"].each{|mention|
-        assert(mentions.include? mention)
-      }
-  end
-end

data/test/rbbt/ner/test_dictionaryNER.rb DELETED

@@ -1,122 +0,0 @@
-require 'rbbt'
-require 'rbbt/util/tmpfile'
-require 'rbbt/util/open'
-require 'rbbt/ner/dictionaryNER'
-require 'test/unit'
-class TestDictionaryNER < Test::Unit::TestCase
-  def setup
-    @dictionary  =<<-EOT
-DICT1\tWord1 Word2\tWord1
-DICT2\tWord3-Word4\tWord4
-    EOT
-    @dict = {
-      "word1" => [{'word2' => ['DICT1'] }, 'DICT1'],
-      "word3" => [{'word4' => ['DICT2'] }],
-      "word4" => ['DICT2'],
-    }
-  end
-  def test_simplify
-    assert_equal('word1', DictionaryNER.simplify( "Word1"))
-    assert_equal('ACL', DictionaryNER.simplify("ACL"))
-  end
-  def test_chunk
-    assert_equal(["Word1","Word2"], DictionaryNER.chunk('Word1-Word2'))
-    assert_equal(["Word1-1"], DictionaryNER.chunk('Word1-1'))
-  end
-  def test_match
-    [
-      ["Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
-      ["Word1 Word1", {"word1" => ["D1"]}, {"Word1" => ["D1"]}],
-      ["Word2 Word1 Word3", {"word1" => ["D1"]}, {"Word1" => ["D1"]} ],
-      ["Word2 Word1 Word4", {"word1" => ["D1","D2"]}, {"Word1" => ["D1","D2"]} ],
-      ["Word2 Word1 Word4",
-        {"word1" => [{'word2' => ['D1']}]},
-        {} ],
-      [
-        "Word2 Word1 Word4",
-        {"word1" => [ {'word4' => ['D1']} ] },
-        {"Word1 Word4" => ["D1"]},
-      ],
-      [
-        "Word2 Word1 Word4",
-        {"word1" => [ {'word4' => ['D1']} ], "word4" => ['D2'] },
-        {"Word1 Word4" => ["D1"], "Word4" => ['D2']},
-      ],
-    ].each{|match_info|
-      text   = match_info[0]
-      dict   = match_info[1]
-      result = match_info[2]
-      assert_equal(result, DictionaryNER.match(dict, text))
-    }
-  end
-  def test_add_name
-    [
-      ["Word1", {"word1" => ['code']}],
-      ["Word1 Word2", {"word1" => [{"word2" => ['code']}]}],
-      ["Cerebellar stroke syndrome", {"cerebellar" => [{'stroke' => [{'syndrome' => ['code']}]}]}]
-    ].each{|info|
-      name = info[0]
-      result = info[1]
-      dict = {}
-      DictionaryNER.add_name(dict, name, 'code')
-      assert_equal(result, dict)
-    }
-  end
-  def test_load
-    assert_equal(@dict, DictionaryNER.load(@dictionary))
-  end
-  def test_class
-    ner = DictionaryNER.new(@dictionary)
-    [
-      [ "Word1 Word2", ["Word1 Word2", "Word1"] ],
-      [ "foo Word1 Word2 foo", ["Word1 Word2", "Word1"] ],
-      [ "Word1-Word2", ["Word1 Word2", "Word1"] ],
-      [ "Word1\nWord2", ["Word1 Word2", "Word1"] ],
-    ].each{|info|
-      text = info[0]
-      keys = info[1]
-      assert_equal(keys.sort, ner.match(text).keys.sort)
-    }
-  end
-  def test_load_from_file
-    tmpfile = TmpFile.tmp_file
-    Open.write(tmpfile, @dictionary)
-    ner = DictionaryNER.new(tmpfile)
-    assert(ner.match("Word1").any?)
-  end
-end

data/test/rbbt/ner/test_regexpNER.rb DELETED

@@ -1,33 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt'
-require 'rbbt/util/tmpfile'
-require 'rbbt/ner/regexpNER'
-require 'test/unit'
-class TestRegExpNER < Test::Unit::TestCase
-  def test_class
-    text = "a bc d e f g h i j k  l m n o p q one two"
-    lexicon =<<-EOF
-C1,a,x,xx,xxx
-C2,bc,y,yy,yyy
-C3,i,z,zz,zzz,m,one two
-    EOF
-    file = TmpFile.tmp_file
-    File.open(file, 'w'){|f| f.write lexicon}
-    r = RegExpNER.new(file, :sep => ',', :stopwords => false)
-    assert_equal(['a', 'bc', 'i', 'm','one two'].sort,r.match_hash(text).values.flatten.sort)
-    r = RegExpNER.new(file, :sep => ',', :stopwords => true)
-    assert_equal(['bc', 'm','one two'].sort,r.match_hash(text).values.flatten.sort)
-    FileUtils.rm file
-  end
-end

data/test/rbbt/ner/test_rner.rb DELETED

@@ -1,126 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt'
-require 'rbbt/ner/rner'
-require 'test/unit'
-class TestRNer < Test::Unit::TestCase
-  def setup
-    @parser = NERFeatures.new do
-      isLetters     /^[A-Z]+$/i
-      context prefix_3      /^(...)/
-      downcase do |w| w.downcase end
-      context %w(downcase)
-    end
-  end
-  def test_config
-    config = <<-EOC
-  isLetters(/^[A-Z]+$/i)
-  context(prefix_3(/^(...)/))
-  downcase { |w| w.downcase }
-  context(["downcase"])
-    EOC
-    assert(@parser.config == config)
-  end
-  def test_reverse
-    assert_equal("protein P53", NERFeatures.reverse("P53 protein"))
-    assert_equal(
-       ". LH of assay - radioimmuno serum the with compared was LH urinary for ) GONAVIS - HI ( test hemagglutination direct new A",
-     NERFeatures.reverse(
-       "A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum\n radioimmuno-assay of LH."
-      ))
-  end
-  def test_features
-    assert(@parser.features("abCdE"),["abCdE",true,'abc','abcde'])
-  end
-  def test_template
-    template =<<-EOT
-UisLetters: %x[0,1]
-Uprefix_3: %x[0,2]
-Uprefix_3#1: %x[1,2]
-Uprefix_3#-1: %x[-1,2]
-Udowncase: %x[0,3]
-Udowncase#1: %x[1,3]
-Udowncase#-1: %x[-1,3]
-B
-    EOT
-    assert(@parser.template == template)
-  end
-  def test_tokens
-    assert( NERFeatures.tokens("A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum\n radioimmuno-assay of LH.")==
-           ["A", "new", "direct", "hemagglutination", "test", "(", "HI", "-", "GONAVIS", ")", "for", "urinary", "LH", "was", "compared", "with", "the", "serum", "radioimmuno", "-", "assay", "of", "LH", "."])
-  end
-  def test_text_features
-    assert(@parser.text_features("abCdE 1234") == [["abCdE",true, "abC", "abcde"], ["1234",false, "123", "1234"]])
-    assert(@parser.text_features("abCdE 1234",true) == [["abCdE",true, "abC", "abcde",1], ["1234",false, "123", "1234",2]])
-    assert(@parser.text_features("abCdE 1234",false) == [["abCdE",true, "abC", "abcde",0], ["1234",false, "123", "1234",0]])
-  end
-  def test_tagged_features
-    assert_equal(
-      [["phosphorilation",true, "pho", "phosphorilation", 0],
-        ["of",true, false, "of", 0],
-        ["GENE1",false, "GEN", "gene1", 1],
-        [".", false, false, ".", 0]],
-      @parser.tagged_features("phosphorilation of GENE1.",['GENE1']))
-      assert_equal(
-        [["GENE1",false, "GEN", "gene1", 1],
-          ["phosphorilation",true, "pho", "phosphorilation", 0]],
-      @parser.tagged_features("GENE1 phosphorilation",['GENE1']))
-    assert_equal(
-           [["phosphorilation",true, "pho", "phosphorilation", 0],
-            ["of",true, false, "of", 0],
-            ["GENE",true, "GEN", "gene", 1],
-            ["1",false, false, "1", 2],
-            [".", false, false, ".", 0]],
-      @parser.tagged_features("phosphorilation of GENE 1.",['GENE 1']))
-  end
-  def test_tagged_features_reverse
-    @parser.reverse = true
-    assert_equal(
-      [
-        ["GENE1",false, "GEN", "gene1", 1],
-        ["of",true, false, "of", 0],
-        ["phosphorilation",true, "pho", "phosphorilation", 0]
-    ],
-    @parser.tagged_features("phosphorilation of GENE1",['GENE1']))
-    assert_equal(
-          [
-            [".", false, false, ".", 0],
-            ["1",false, false, "1", 1],
-            ["GENE",true, "GEN", "gene", 2],
-            ["of",true, false, "of", 0],
-            ["phosphorilation",true, "pho", "phosphorilation", 0]
-        ],
-    @parser.tagged_features("phosphorilation of GENE 1.",['GENE 1']))
-  end
-  def test_NER_default
-    parser = NERFeatures.new
-    assert(parser.template =~ /UisLetter/)
-  end
-  def test_CRFPP_install
-    assert(require File.join(Rbbt.datadir, 'third_party/crf++/ruby/CRFPP'))
-  end
-end

data/test/rbbt/ner/test_rnorm.rb DELETED

@@ -1,47 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt/ner/rnorm'
-require 'rbbt/util/open'
-require 'rbbt/util/tmpfile'
-require 'test/unit'
-class TestRNORM < Test::Unit::TestCase
-  def setup
-    tmp = TmpFile.tmp_file("test-rnorm-")
-    lexicon =<<-EOT
-S000000029	YAL031C	GIP4	FUN21
-S000000030	YAL032C	PRP45	FUN20
-S000000031	YAL033W	POP5	FUN53
-S000000374	YBR170C	NPL4	HRD4
-S000000375	GENE1	BBB	CCC
-S000000376	AAA	GENE1	DDD
-	EOT
-    Open.write(tmp, lexicon)
-    @norm = Normalizer.new(tmp)
-    FileUtils.rm tmp
-  end
-  def test_match
-     assert_equal(["S000000029"], @norm.match("FUN21"))
-     assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
-     assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
-     assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
-     assert_equal([], @norm.match("GER4"))
-     @norm.match("FUN21")
-  end
-  def test_select
-    assert_equal(["S000000029"], @norm.select(["S000000030", "S000000029", "S000000031"],"FUN 21"))
-  end
-  def test_resolve
-    assert_equal(["S000000029"], @norm.resolve("FUN 21"))
-  end
-  def test_order
-    assert_equal(["S000000375"], @norm.resolve("GENE1"))
-  end
-end

data/test/rbbt/sources/test_biocreative.rb DELETED

@@ -1,38 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt/sources/biocreative'
-require 'test/unit'
-class TestBiocreative < Test::Unit::TestCase
-  def test_BC2GM
-    assert(Biocreative.BC2GM(:test)['BC2GM000008491'][:text] == "Phenotypic analysis demonstrates that trio and Abl cooperate in regulating axon outgrowth in the embryonic central nervous system (CNS).")
-    assert(Biocreative.BC2GM(:test)['BC2GM000008491'][:mentions] == ["trio", "Abl"] )
-  end
-  def test_position
-    mention   = "IgA"
-    text      = "Early complement components, C1q and C4, and IgA secretory piece were absent."
-    pos       = [[38, 40]]
-    assert(Biocreative.position(text,mention) == pos)
-    mention   = "tyrosine-specific phosphatase"
-    text      = "When expressed in Escherichia coli, SH-PTP2 displays tyrosine-specific phosphatase activity."
-    pos       = [[46, 73]]
-    assert(Biocreative.position(text,mention) == pos)
-    mention   = "tyrosine - specific phosphatase"
-    text      = "When expressed in Escherichia coli, SH-PTP2 displays tyrosine-specific phosphatase activity."
-    pos       = [[46, 73]]
-    assert(Biocreative.position(text,mention) == pos)
-    mention   = "LH"
-    text      = "A new direct hemagglutination test (HI-GONAVIS) for urinary LH was compared with the serum radioimmuno-assay of LH."
-    pos       = [[52, 53],[96, 97]]
-    assert(Biocreative.position(text,mention) == pos)
-  end
-end

data/test/rbbt/sources/test_biomart.rb DELETED

@@ -1,31 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt/sources/biomart'
-require 'test/unit'
-class TestBioMart < Test::Unit::TestCase
-  def test_get
-    assert_raise BioMart::QueryError do
-      BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],['with_unknownattr'])
-    end
-    data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['protein_id'],[])
-    assert(data['856452']['protein_id'].include? 'AAB68382')
-    data = BioMart.get('scerevisiae_gene_ensembl','entrezgene', ['external_gene_id'],[], data )
-    assert(data['856452']['protein_id'].include? 'AAB68382')
-    assert(data['856452']['external_gene_id'].include? 'CUP1-2')
-  end
-  def test_query
-    data = BioMart.query('scerevisiae_gene_ensembl','entrezgene', ['protein_id','refseq_peptide','external_gene_id','ensembl_gene_id'],[])
-    assert(data['856452']['protein_id'].include? 'AAB68382')
-    assert(data['856452']['external_gene_id'].include? 'CUP1-2')
- end
-end

data/test/rbbt/sources/test_entrez.rb DELETED

@@ -1,49 +0,0 @@
-require File.dirname(__FILE__) + '/../../test_helper'
-require 'rbbt/sources/entrez'
-require 'test/unit'
-class TestEntrez < Test::Unit::TestCase
-  def test_entrez2native
-    tax   = 4932
-    fix   = proc{|code| code.sub(/SGD:S0/,'S0') }
-    check = proc{|code| code.match(/^S0/)}
-    lexicon = Entrez.entrez2native(tax, 5, fix, check)
-    assert(lexicon['855611'].include? 'S000005056')
-  end
-  def test_entrez2pubmed
-    tax = 4932
-    data = Entrez.entrez2pubmed(tax)
-    assert(data['850320'].include? '15102838')
-  end
-  def test_getonline
-    geneids = 9129
-    assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids))
-    geneids = [9129,9]
-    assert_match(/PRP3 pre-mRNA processing factor/s, Entrez.get_online(geneids)[9129])
-  end
-  def test_getgene
-    geneids = 9129
-    assert_equal([["PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"]], Entrez.get_gene(geneids).description)
-    geneids = [9129,728049]
-    assert_equal([["PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"]], Entrez.get_gene(geneids)[9129].description)
-  end
-  def test_similarity
-    assert(Entrez.gene_text_similarity(9129, "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)") > 0)
-    assert_equal(0, Entrez.gene_text_similarity("NON EXISTEN GENEID", "PRP3 pre-mRNA processing factor 3 homolog (S. cerevisiae)"))
-  end
-end