rbbt-text 0.6.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -101,7 +101,7 @@ module NLP
101
101
  a, b, d, c = $1, $2, $3, $4
102
102
  events << eventCount.to_s << "\t"
103
103
  events << returnFeatures(a, b, c)
104
- (" " + a + b + "__" + eventCount.to_s + "____" + d + "__" + c + " ")
104
+ (" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ")
105
105
  }
106
106
  eventCount += 1
107
107
  end
@@ -111,6 +111,27 @@ module NLP
111
111
  [events, marks]
112
112
  end
113
113
 
114
+ def self.event_extraction(text)
115
+ events = ""
116
+ marks = ""
117
+
118
+ eventCount = 0
119
+
120
+ pat = / ([^ ]+)([.!\?\)\]\"])( +)([^ ]+) /
121
+ for line in text.split(/\n/) do
122
+ while line.match(pat) do
123
+ a, b, d, c = $1, $2, $3, $4
124
+ events << eventCount.to_s << "\t"
125
+ events << returnFeatures(a, b, c)
126
+ line = $` + (" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ") << $'
127
+ eventCount += 1
128
+ end
129
+ marks << line
130
+ end
131
+
132
+ [events, marks]
133
+ end
134
+
114
135
  def self.process_labels(marked_text, labels)
115
136
  out = ""
116
137
 
@@ -0,0 +1,74 @@
1
+ require 'rbbt'
2
+ require 'rjb'
3
+ require 'rbbt/ner/segment'
4
+ require 'rbbt/resource'
5
+
6
+ module OpenNLP
7
+ Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
8
+
9
+ Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
10
+
11
+ MAX = 5
12
+
13
+ @@FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ @@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
15
+ @@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
16
+
17
+ def self.sentence_split_detector
18
+ @@sentence_split_detector ||= begin
19
+ modelIn = @@FileInputStream.new(Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce.find);
20
+
21
+ model = @@SentenceModel.new(modelIn);
22
+ modelIn.close()
23
+ model
24
+
25
+ @@SentenceDetectorME.new(model)
26
+ end
27
+ end
28
+
29
+ def self.sentence_splitter(text)
30
+ return [] if text.nil? or text.empty?
31
+
32
+ last = 0
33
+ begin
34
+ sentence_split_detector = self.sentence_split_detector
35
+
36
+ sentences = nil
37
+ TmpFile.with_file do |tmpfile|
38
+ start_time = Time.now
39
+
40
+ begin
41
+ pid = Process.fork do
42
+ sent = sentence_split_detector.sentDetect(text)
43
+ Open.write(tmpfile, sent * "#OpenNLP:SENTENCE#")
44
+ end
45
+
46
+ while not Process.waitpid(pid)
47
+ if Time.now - start_time > MAX
48
+ Process.kill(9, pid)
49
+ raise "Taking to long (> #{MAX} seconds)"
50
+ end
51
+ sleep 0.1
52
+ end
53
+
54
+ begin
55
+ Process.waitpid(pid)
56
+ end
57
+ rescue Errno::ECHILD
58
+ end
59
+
60
+ sentences = Open.read(tmpfile).split("#OpenNLP:SENTENCE#")
61
+ end
62
+
63
+ sentences.collect{|sentence|
64
+ start = text.index(sentence, last)
65
+ Segment.setup sentence, start
66
+ last = start + sentence.length - 1
67
+ sentence
68
+ }
69
+ rescue Exception
70
+ raise $!
71
+ raise "Sentence splitter raised exception: #{$!.message}"
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,21 @@
1
+ #!/bin/bash
2
+
3
+ name='Linnaeus'
4
+ url="http://sourceforge.net/projects/linnaeus/files/Linnaeus/linnaeus-2.0.tar.gz/download"
5
+ species_url="http://sourceforge.net/projects/linnaeus/files/Entity_packs/species-proxy-1.2.tar.gz/download"
6
+
7
+ install_src "$name" "$url"
8
+ ln -s "$OPT_DIR/$name/bin/"*.jar "$OPT_JAR_DIR/$name.jar"
9
+
10
+ echo "GET SPECIES" > /tmp/foo
11
+ get_src "Linnaeus-species-proxy" "$species_url"
12
+ pkg_dir="`opt_dir \"$name\"`"
13
+ build_dir=`build_dir`
14
+ echo $pkg_dir >> /tmp/foo
15
+ echo $build_dir >> /tmp/foo
16
+ mv "$build_dir" "$pkg_dir"
17
+ tmp_file="/tmp/species-proxy-properties.tmp"
18
+ cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
19
+ echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
20
+ cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
21
+
@@ -0,0 +1,12 @@
1
+ #!/bin/bash
2
+
3
+ name='OpenNLP'
4
+ url="http://apache.rediris.es//incubator/opennlp/apache-opennlp-1.5.2-incubating-bin.tar.gz"
5
+
6
+ get_src "$name" "$url"
7
+ move_opt "$name"
8
+
9
+
10
+ ln -sf "$OPT_DIR/$name/lib/"*.jar "$OPT_JAR_DIR/"
11
+
12
+ clean_build
@@ -1,6 +1,5 @@
1
1
  require 'rbbt/util/misc'
2
2
 
3
-
4
3
  plural = Proc.new do |t| t.sub(/s$/,'') end
5
4
 
6
5
  tokens do
@@ -14,7 +13,7 @@ tokens do
14
13
 
15
14
  # Some words for removal
16
15
  stopword do |w| $stopwords.include?( w.downcase_first) end
17
- gene /genes?/i
16
+ gene /genes?/i
18
17
  dna
19
18
  cdna
20
19
  rna
@@ -0,0 +1,66 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/entity'
3
+ require 'rbbt/entity/pmid'
4
+ require 'rbbt/entity/document'
5
+ require 'test/unit'
6
+
7
+ require 'rbbt/workflow'
8
+
9
+ Workflow.require_workflow "TextMining"
10
+
11
+ module Document
12
+ self.corpus = Persist.open_tokyocabinet("/tmp/corpus", false, :string, "BDB")
13
+
14
+ property :banner => :single do |*args|
15
+ normalize, organism = args
16
+ TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
17
+ end
18
+
19
+ property :abner => :single do |*args|
20
+ normalize, organism = args
21
+ TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
22
+ end
23
+
24
+ persist :abner, :annotations, :dir => Rbbt.tmp.test.find(:user).entity_property
25
+ end
26
+
27
+ class TestDocument < Test::Unit::TestCase
28
+ def test_pmid
29
+ pmid = "21904853"
30
+ PMID.setup(pmid)
31
+
32
+ assert_match /^PMID/, pmid.id
33
+ assert_match /TET2/, pmid.text
34
+ end
35
+
36
+ def test_abner
37
+ pmid = "21904853"
38
+ PMID.setup(pmid)
39
+
40
+ genes = pmid.abner.reject{|ne| ne.offset.nil?}
41
+ genes.each do |ne|
42
+ orig = ne
43
+ orig_range = ne.range
44
+ ne.mask
45
+ assert ne.masked?
46
+ assert ne =~ /^MASKED/
47
+ assert_equal orig_range, ne.range
48
+ assert_equal ne, ne.unmask
49
+ end
50
+ assert pmid.abner.include? "TET2"
51
+ end
52
+
53
+ def test_free_text
54
+ text = "Free text including a mention to TET2."
55
+ Document.setup(text)
56
+
57
+ assert text.abner.include? "TET2"
58
+
59
+ docid = text.docid
60
+ assert_match /TET2/, Document.setup(docid).text
61
+
62
+ assert Document.setup(docid).abner.include? "TET2"
63
+ end
64
+ end
65
+
66
+
@@ -74,6 +74,16 @@ class TestClass < Test::Unit::TestCase
74
74
  assert_equal original.gsub(/TP53/, 'GN'), a
75
75
  end
76
76
 
77
+ Transformed.with_transform(a, [gene1], "GN") do
78
+ Transformed.with_transform(a, [gene2], "LONG_GENE_PLACEHOLDER") do
79
+ assert_equal original.gsub(/TP53/, 'GN').sub('CDK5R1', "LONG_GENE_PLACEHOLDER"), a
80
+ end
81
+ assert_equal original.gsub(/TP53/, 'GN'), a
82
+ end
83
+
84
+ assert_equal original, a
85
+
86
+
77
87
  assert_equal original, a
78
88
 
79
89
  exp1, exp2 = nil, nil
@@ -0,0 +1,34 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/entity'
3
+ require 'rbbt/ner/finder'
4
+ require 'rbbt/ner/finder'
5
+ require 'rbbt/sources/organism'
6
+ require 'rbbt/sources/kegg'
7
+ require 'rbbt/sources/NCI'
8
+
9
+ class TestFinder < Test::Unit::TestCase
10
+
11
+ def _test_namespace_and_format
12
+ #f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers("Hsa/jun2011").find)))
13
+ f = Finder.new(KEGG.pathways, :grep => "^hsa")
14
+ assert_equal "Hsa/jun2011", f.instances.first.namespace
15
+ assert_equal "Ensembl Gene ID", f.instances.first.format
16
+ end
17
+
18
+ def _test_find
19
+ f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["SF3B1"])
20
+
21
+ assert_equal "ENSG00000115524", f.find("SF3B1").first
22
+ if defined? Entity
23
+ ddd f.find("SF3B1").first.info
24
+ assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
25
+ end
26
+ end
27
+
28
+ def test_find
29
+ f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["RASGRF2"])
30
+
31
+ ddd f.find("RAS").collect{|m| m.info}
32
+ end
33
+
34
+ end
@@ -0,0 +1,16 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/ner/linnaeus'
3
+ require 'test/unit'
4
+
5
+ class TestLinnaeus < Test::Unit::TestCase
6
+
7
+ def test_match
8
+ begin
9
+ mentions = Linnaeus.match("Human HeLa cells and murine models")
10
+ ["Human", "HeLa cells", "murine"].each{|mention|
11
+ assert(mentions.include? mention)
12
+ }
13
+ rescue
14
+ end
15
+ end
16
+ end
@@ -3,6 +3,7 @@ require 'rbbt/ner/ngram_prefix_dictionary'
3
3
  require 'rbbt/util/tmpfile'
4
4
 
5
5
  class TestNGramPrefixDictionary < Test::Unit::TestCase
6
+
6
7
  def test_match
7
8
  lexicon =<<-EOF
8
9
  C1;aa;AA;bb b
@@ -17,6 +18,27 @@ C2;11;22;3 3;bb
17
18
  end
18
19
  end
19
20
 
21
+ def test_case_insensitive_match
22
+ lexicon =<<-EOF
23
+ C1;aa
24
+ C2;bb
25
+ EOF
26
+
27
+ TmpFile.with_file(lexicon) do |file|
28
+ index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test", true)
29
+
30
+ matches = index.match('AA oo')
31
+ assert matches.select{|m| m.code.include? 'C1'}.any?
32
+ assert matches.include? 'AA'
33
+
34
+ matches = index.match('AA')
35
+ assert matches.select{|m| m.code.include? 'C1'}.any?
36
+ assert matches.include? 'AA'
37
+
38
+ end
39
+ end
40
+
41
+
20
42
  def test_stream
21
43
  lexicon =<<-EOF
22
44
  C1;aa;AA;bb b
@@ -8,9 +8,9 @@ class TestOSCAR4 < Test::Unit::TestCase
8
8
  def test_match
9
9
  begin
10
10
  ner = OSCAR4.new
11
- str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
11
+ str = "Alternatively, CO2 rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
12
12
 
13
- mentions = ner.match(str, "CM", false)
13
+ mentions = ner.match(str, "CM")
14
14
  good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
15
15
 
16
16
  good_mentions.each{|mention|
@@ -22,7 +22,7 @@ class TestOSCAR4 < Test::Unit::TestCase
22
22
  end
23
23
  end
24
24
 
25
- def test_ranges
25
+ def _test_ranges
26
26
  begin
27
27
  ner = OSCAR4.new
28
28
  str =<<-EOF
@@ -25,9 +25,9 @@ S000000376 AAA GENE1 DDD
25
25
 
26
26
  def test_match
27
27
  assert_equal(["S000000029"], @norm.match("FUN21"))
28
- assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
29
- assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
30
- assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
28
+ assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
29
+ assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
30
+ assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
31
31
  assert_equal([], @norm.match("GER4"))
32
32
 
33
33
  @norm.match("FUN21")
@@ -0,0 +1,45 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/nlp/open_nlp/sentence_splitter'
3
+ require 'rbbt/ner/segment'
4
+
5
+ $text=<<-EOF
6
+ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
7
+ of early childhood poorly responding to therapy. The majority of cases show
8
+ inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
9
+ triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
10
+ report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
11
+ retained SMARCB1 staining on immunohistochemistry and lacked genetic
12
+ alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
13
+ another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
14
+ (BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
15
+ findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
16
+ AT/RT and the usefulness of antibodies directed against SMARCA4 in this
17
+ diagnostic setting.
18
+ EOF
19
+
20
+ class TestClass < Test::Unit::TestCase
21
+
22
+ def test_sentences
23
+ text =<<-EOF
24
+ This is a sentence.
25
+ A funky character ™ in a sentence.
26
+ This is a sentence.
27
+ This is a
28
+ sentence. This is
29
+ another sentence.
30
+ EOF
31
+
32
+ assert_equal 5, OpenNLP.sentence_splitter(text).length
33
+ assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
34
+ end
35
+
36
+ def test_text_sentences
37
+ Misc.benchmark(100) do
38
+ OpenNLP.sentence_splitter($text).include? "Our
39
+ findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
40
+ AT/RT and the usefulness of antibodies directed against SMARCA4 in this
41
+ diagnostic setting."
42
+ end
43
+ end
44
+ end
45
+
data/test/test_helper.rb CHANGED
@@ -9,7 +9,7 @@ require 'rbbt/util/log'
9
9
  require 'rbbt/corpus/document_repo'
10
10
 
11
11
  class Test::Unit::TestCase
12
- def test_datafile(file)
12
+ def get_test_datafile(file)
13
13
  File.join(File.dirname(__FILE__), 'data', file)
14
14
  end
15
15
 
metadata CHANGED
@@ -1,103 +1,104 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
- version: !ruby/object:Gem::Version
4
- hash: 1
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 6
9
- - 3
10
- version: 0.6.3
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Miguel Vazquez
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-02-09 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-12-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: rbbt-util
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 63
29
- segments:
30
- - 4
31
- - 0
32
- - 0
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
33
21
  version: 4.0.0
34
22
  type: :runtime
35
- version_requirements: *id001
36
- - !ruby/object:Gem::Dependency
37
- name: stemmer
38
23
  prerelease: false
39
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 4.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: stemmer
32
+ requirement: !ruby/object:Gem::Requirement
40
33
  none: false
41
- requirements:
42
- - - ">="
43
- - !ruby/object:Gem::Version
44
- hash: 3
45
- segments:
46
- - 0
47
- version: "0"
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
48
38
  type: :runtime
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: libxml-ruby
52
39
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: libxml-ruby
48
+ requirement: !ruby/object:Gem::Requirement
54
49
  none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- hash: 3
59
- segments:
60
- - 0
61
- version: "0"
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
62
54
  type: :runtime
63
- version_requirements: *id003
64
- - !ruby/object:Gem::Dependency
65
- name: json
66
55
  prerelease: false
67
- requirement: &id004 !ruby/object:Gem::Requirement
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: json
64
+ requirement: !ruby/object:Gem::Requirement
68
65
  none: false
69
- requirements:
70
- - - ">="
71
- - !ruby/object:Gem::Version
72
- hash: 3
73
- segments:
74
- - 0
75
- version: "0"
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
76
70
  type: :runtime
77
- version_requirements: *id004
78
- - !ruby/object:Gem::Dependency
79
- name: rjb
80
71
  prerelease: false
81
- requirement: &id005 !ruby/object:Gem::Requirement
72
+ version_requirements: !ruby/object:Gem::Requirement
82
73
  none: false
83
- requirements:
84
- - - ">="
85
- - !ruby/object:Gem::Version
86
- hash: 3
87
- segments:
88
- - 0
89
- version: "0"
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rjb
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
90
86
  type: :runtime
91
- version_requirements: *id005
92
- description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: ! 'Text mining tools: named entity recognition and normalization, document
95
+ classification, bag-of-words, dictionaries, etc'
93
96
  email: miguel.vazquez@fdi.ucm.es
94
- executables:
97
+ executables:
95
98
  - get_ppis.rb
96
99
  extensions: []
97
-
98
100
  extra_rdoc_files: []
99
-
100
- files:
101
+ files:
101
102
  - lib/rbbt/bow/bow.rb
102
103
  - lib/rbbt/bow/dictionary.rb
103
104
  - lib/rbbt/bow/misc.rb
@@ -110,6 +111,8 @@ files:
110
111
  - lib/rbbt/ner/abner.rb
111
112
  - lib/rbbt/ner/banner.rb
112
113
  - lib/rbbt/ner/chemical_tagger.rb
114
+ - lib/rbbt/ner/finder.rb
115
+ - lib/rbbt/ner/linnaeus.rb
113
116
  - lib/rbbt/ner/ngram_prefix_dictionary.rb
114
117
  - lib/rbbt/ner/oscar3.rb
115
118
  - lib/rbbt/ner/oscar4.rb
@@ -119,6 +122,7 @@ files:
119
122
  - lib/rbbt/ner/rnorm/cue_index.rb
120
123
  - lib/rbbt/ner/rnorm/tokens.rb
121
124
  - lib/rbbt/ner/segment.rb
125
+ - lib/rbbt/ner/segment/docid.rb
122
126
  - lib/rbbt/ner/segment/named_entity.rb
123
127
  - lib/rbbt/ner/segment/relationship.rb
124
128
  - lib/rbbt/ner/segment/segmented.rb
@@ -127,13 +131,16 @@ files:
127
131
  - lib/rbbt/ner/token_trieNER.rb
128
132
  - lib/rbbt/nlp/genia/sentence_splitter.rb
129
133
  - lib/rbbt/nlp/nlp.rb
134
+ - lib/rbbt/nlp/open_nlp/sentence_splitter.rb
130
135
  - share/install/software/ABNER
131
136
  - share/install/software/BANNER
132
137
  - share/install/software/ChemicalTagger
133
138
  - share/install/software/Gdep
134
139
  - share/install/software/Geniass
140
+ - share/install/software/Linnaeus
135
141
  - share/install/software/OSCAR3
136
142
  - share/install/software/OSCAR4
143
+ - share/install/software/OpenNLP
137
144
  - share/install/software/StanfordParser
138
145
  - share/patterns/drug_induce_disease
139
146
  - share/rnorm/cue_default
@@ -157,44 +164,37 @@ files:
157
164
  - test/rbbt/ner/test_oscar4.rb
158
165
  - test/rbbt/ner/test_chemical_tagger.rb
159
166
  - test/rbbt/ner/test_ngram_prefix_dictionary.rb
167
+ - test/rbbt/ner/test_finder.rb
168
+ - test/rbbt/ner/test_linnaeus.rb
169
+ - test/rbbt/entity/test_document.rb
160
170
  - test/rbbt/nlp/test_nlp.rb
161
- - test/rbbt/corpus/test_corpus.rb
162
- - test/rbbt/corpus/test_document.rb
171
+ - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
163
172
  - bin/get_ppis.rb
164
173
  homepage: http://github.com/mikisvaz/rbbt-util
165
174
  licenses: []
166
-
167
175
  post_install_message:
168
176
  rdoc_options: []
169
-
170
- require_paths:
177
+ require_paths:
171
178
  - lib
172
- required_ruby_version: !ruby/object:Gem::Requirement
179
+ required_ruby_version: !ruby/object:Gem::Requirement
173
180
  none: false
174
- requirements:
175
- - - ">="
176
- - !ruby/object:Gem::Version
177
- hash: 3
178
- segments:
179
- - 0
180
- version: "0"
181
- required_rubygems_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ! '>='
183
+ - !ruby/object:Gem::Version
184
+ version: '0'
185
+ required_rubygems_version: !ruby/object:Gem::Requirement
182
186
  none: false
183
- requirements:
184
- - - ">="
185
- - !ruby/object:Gem::Version
186
- hash: 3
187
- segments:
188
- - 0
189
- version: "0"
187
+ requirements:
188
+ - - ! '>='
189
+ - !ruby/object:Gem::Version
190
+ version: '0'
190
191
  requirements: []
191
-
192
192
  rubyforge_project:
193
- rubygems_version: 1.8.10
193
+ rubygems_version: 1.8.24
194
194
  signing_key:
195
195
  specification_version: 3
196
196
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
197
- test_files:
197
+ test_files:
198
198
  - test/test_helper.rb
199
199
  - test/rbbt/bow/test_bow.rb
200
200
  - test/rbbt/bow/test_dictionary.rb
@@ -213,6 +213,8 @@ test_files:
213
213
  - test/rbbt/ner/test_oscar4.rb
214
214
  - test/rbbt/ner/test_chemical_tagger.rb
215
215
  - test/rbbt/ner/test_ngram_prefix_dictionary.rb
216
+ - test/rbbt/ner/test_finder.rb
217
+ - test/rbbt/ner/test_linnaeus.rb
218
+ - test/rbbt/entity/test_document.rb
216
219
  - test/rbbt/nlp/test_nlp.rb
217
- - test/rbbt/corpus/test_corpus.rb
218
- - test/rbbt/corpus/test_document.rb
220
+ - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb