rbbt-text 0.6.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -101,7 +101,7 @@ module NLP
101
101
  a, b, d, c = $1, $2, $3, $4
102
102
  events << eventCount.to_s << "\t"
103
103
  events << returnFeatures(a, b, c)
104
- (" " + a + b + "__" + eventCount.to_s + "____" + d + "__" + c + " ")
104
+ (" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ")
105
105
  }
106
106
  eventCount += 1
107
107
  end
@@ -111,6 +111,27 @@ module NLP
111
111
  [events, marks]
112
112
  end
113
113
 
114
+ def self.event_extraction(text)
115
+ events = ""
116
+ marks = ""
117
+
118
+ eventCount = 0
119
+
120
+ pat = / ([^ ]+)([.!\?\)\]\"])( +)([^ ]+) /
121
+ for line in text.split(/\n/) do
122
+ while line.match(pat) do
123
+ a, b, d, c = $1, $2, $3, $4
124
+ events << eventCount.to_s << "\t"
125
+ events << returnFeatures(a, b, c)
126
+ line = $` + (" " << a << b << "__" << eventCount.to_s << "____" << d << "__" << c << " ") << $'
127
+ eventCount += 1
128
+ end
129
+ marks << line
130
+ end
131
+
132
+ [events, marks]
133
+ end
134
+
114
135
  def self.process_labels(marked_text, labels)
115
136
  out = ""
116
137
 
@@ -0,0 +1,74 @@
1
+ require 'rbbt'
2
+ require 'rjb'
3
+ require 'rbbt/ner/segment'
4
+ require 'rbbt/resource'
5
+
6
+ module OpenNLP
7
+ Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
8
+
9
+ Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
10
+
11
+ MAX = 5
12
+
13
+ @@FileInputStream = Rjb::import('java.io.FileInputStream')
14
+ @@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
15
+ @@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
16
+
17
+ def self.sentence_split_detector
18
+ @@sentence_split_detector ||= begin
19
+ modelIn = @@FileInputStream.new(Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce.find);
20
+
21
+ model = @@SentenceModel.new(modelIn);
22
+ modelIn.close()
23
+ model
24
+
25
+ @@SentenceDetectorME.new(model)
26
+ end
27
+ end
28
+
29
+ def self.sentence_splitter(text)
30
+ return [] if text.nil? or text.empty?
31
+
32
+ last = 0
33
+ begin
34
+ sentence_split_detector = self.sentence_split_detector
35
+
36
+ sentences = nil
37
+ TmpFile.with_file do |tmpfile|
38
+ start_time = Time.now
39
+
40
+ begin
41
+ pid = Process.fork do
42
+ sent = sentence_split_detector.sentDetect(text)
43
+ Open.write(tmpfile, sent * "#OpenNLP:SENTENCE#")
44
+ end
45
+
46
+ while not Process.waitpid(pid)
47
+ if Time.now - start_time > MAX
48
+ Process.kill(9, pid)
49
+ raise "Taking to long (> #{MAX} seconds)"
50
+ end
51
+ sleep 0.1
52
+ end
53
+
54
+ begin
55
+ Process.waitpid(pid)
56
+ end
57
+ rescue Errno::ECHILD
58
+ end
59
+
60
+ sentences = Open.read(tmpfile).split("#OpenNLP:SENTENCE#")
61
+ end
62
+
63
+ sentences.collect{|sentence|
64
+ start = text.index(sentence, last)
65
+ Segment.setup sentence, start
66
+ last = start + sentence.length - 1
67
+ sentence
68
+ }
69
+ rescue Exception
70
+ raise $!
71
+ raise "Sentence splitter raised exception: #{$!.message}"
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,21 @@
1
+ #!/bin/bash
2
+
3
+ name='Linnaeus'
4
+ url="http://sourceforge.net/projects/linnaeus/files/Linnaeus/linnaeus-2.0.tar.gz/download"
5
+ species_url="http://sourceforge.net/projects/linnaeus/files/Entity_packs/species-proxy-1.2.tar.gz/download"
6
+
7
+ install_src "$name" "$url"
8
+ ln -s "$OPT_DIR/$name/bin/"*.jar "$OPT_JAR_DIR/$name.jar"
9
+
10
+ echo "GET SPECIES" > /tmp/foo
11
+ get_src "Linnaeus-species-proxy" "$species_url"
12
+ pkg_dir="`opt_dir \"$name\"`"
13
+ build_dir=`build_dir`
14
+ echo $pkg_dir >> /tmp/foo
15
+ echo $build_dir >> /tmp/foo
16
+ mv "$build_dir" "$pkg_dir"
17
+ tmp_file="/tmp/species-proxy-properties.tmp"
18
+ cat "$pkg_dir/species-proxy/properties.conf" |grep -v "^.dir =" >> $tmp_file
19
+ echo "\$dir = $pkg_dir/species-proxy/" > "$pkg_dir/species-proxy/properties.conf"
20
+ cat $tmp_file | grep -v "^#" >> "$pkg_dir/species-proxy/properties.conf"
21
+
@@ -0,0 +1,12 @@
1
+ #!/bin/bash
2
+
3
+ name='OpenNLP'
4
+ url="http://apache.rediris.es//incubator/opennlp/apache-opennlp-1.5.2-incubating-bin.tar.gz"
5
+
6
+ get_src "$name" "$url"
7
+ move_opt "$name"
8
+
9
+
10
+ ln -sf "$OPT_DIR/$name/lib/"*.jar "$OPT_JAR_DIR/"
11
+
12
+ clean_build
@@ -1,6 +1,5 @@
1
1
  require 'rbbt/util/misc'
2
2
 
3
-
4
3
  plural = Proc.new do |t| t.sub(/s$/,'') end
5
4
 
6
5
  tokens do
@@ -14,7 +13,7 @@ tokens do
14
13
 
15
14
  # Some words for removal
16
15
  stopword do |w| $stopwords.include?( w.downcase_first) end
17
- gene /genes?/i
16
+ gene /genes?/i
18
17
  dna
19
18
  cdna
20
19
  rna
@@ -0,0 +1,66 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/entity'
3
+ require 'rbbt/entity/pmid'
4
+ require 'rbbt/entity/document'
5
+ require 'test/unit'
6
+
7
+ require 'rbbt/workflow'
8
+
9
+ Workflow.require_workflow "TextMining"
10
+
11
+ module Document
12
+ self.corpus = Persist.open_tokyocabinet("/tmp/corpus", false, :string, "BDB")
13
+
14
+ property :banner => :single do |*args|
15
+ normalize, organism = args
16
+ TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
17
+ end
18
+
19
+ property :abner => :single do |*args|
20
+ normalize, organism = args
21
+ TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :banner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
22
+ end
23
+
24
+ persist :abner, :annotations, :dir => Rbbt.tmp.test.find(:user).entity_property
25
+ end
26
+
27
+ class TestDocument < Test::Unit::TestCase
28
+ def test_pmid
29
+ pmid = "21904853"
30
+ PMID.setup(pmid)
31
+
32
+ assert_match /^PMID/, pmid.id
33
+ assert_match /TET2/, pmid.text
34
+ end
35
+
36
+ def test_abner
37
+ pmid = "21904853"
38
+ PMID.setup(pmid)
39
+
40
+ genes = pmid.abner.reject{|ne| ne.offset.nil?}
41
+ genes.each do |ne|
42
+ orig = ne
43
+ orig_range = ne.range
44
+ ne.mask
45
+ assert ne.masked?
46
+ assert ne =~ /^MASKED/
47
+ assert_equal orig_range, ne.range
48
+ assert_equal ne, ne.unmask
49
+ end
50
+ assert pmid.abner.include? "TET2"
51
+ end
52
+
53
+ def test_free_text
54
+ text = "Free text including a mention to TET2."
55
+ Document.setup(text)
56
+
57
+ assert text.abner.include? "TET2"
58
+
59
+ docid = text.docid
60
+ assert_match /TET2/, Document.setup(docid).text
61
+
62
+ assert Document.setup(docid).abner.include? "TET2"
63
+ end
64
+ end
65
+
66
+
@@ -74,6 +74,16 @@ class TestClass < Test::Unit::TestCase
74
74
  assert_equal original.gsub(/TP53/, 'GN'), a
75
75
  end
76
76
 
77
+ Transformed.with_transform(a, [gene1], "GN") do
78
+ Transformed.with_transform(a, [gene2], "LONG_GENE_PLACEHOLDER") do
79
+ assert_equal original.gsub(/TP53/, 'GN').sub('CDK5R1', "LONG_GENE_PLACEHOLDER"), a
80
+ end
81
+ assert_equal original.gsub(/TP53/, 'GN'), a
82
+ end
83
+
84
+ assert_equal original, a
85
+
86
+
77
87
  assert_equal original, a
78
88
 
79
89
  exp1, exp2 = nil, nil
@@ -0,0 +1,34 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/entity'
3
+ require 'rbbt/ner/finder'
4
+ require 'rbbt/ner/finder'
5
+ require 'rbbt/sources/organism'
6
+ require 'rbbt/sources/kegg'
7
+ require 'rbbt/sources/NCI'
8
+
9
+ class TestFinder < Test::Unit::TestCase
10
+
11
+ def _test_namespace_and_format
12
+ #f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers("Hsa/jun2011").find)))
13
+ f = Finder.new(KEGG.pathways, :grep => "^hsa")
14
+ assert_equal "Hsa/jun2011", f.instances.first.namespace
15
+ assert_equal "Ensembl Gene ID", f.instances.first.format
16
+ end
17
+
18
+ def _test_find
19
+ f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["SF3B1"])
20
+
21
+ assert_equal "ENSG00000115524", f.find("SF3B1").first
22
+ if defined? Entity
23
+ ddd f.find("SF3B1").first.info
24
+ assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
25
+ end
26
+ end
27
+
28
+ def test_find
29
+ f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["RASGRF2"])
30
+
31
+ ddd f.find("RAS").collect{|m| m.info}
32
+ end
33
+
34
+ end
@@ -0,0 +1,16 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
2
+ require 'rbbt/ner/linnaeus'
3
+ require 'test/unit'
4
+
5
+ class TestLinnaeus < Test::Unit::TestCase
6
+
7
+ def test_match
8
+ begin
9
+ mentions = Linnaeus.match("Human HeLa cells and murine models")
10
+ ["Human", "HeLa cells", "murine"].each{|mention|
11
+ assert(mentions.include? mention)
12
+ }
13
+ rescue
14
+ end
15
+ end
16
+ end
@@ -3,6 +3,7 @@ require 'rbbt/ner/ngram_prefix_dictionary'
3
3
  require 'rbbt/util/tmpfile'
4
4
 
5
5
  class TestNGramPrefixDictionary < Test::Unit::TestCase
6
+
6
7
  def test_match
7
8
  lexicon =<<-EOF
8
9
  C1;aa;AA;bb b
@@ -17,6 +18,27 @@ C2;11;22;3 3;bb
17
18
  end
18
19
  end
19
20
 
21
+ def test_case_insensitive_match
22
+ lexicon =<<-EOF
23
+ C1;aa
24
+ C2;bb
25
+ EOF
26
+
27
+ TmpFile.with_file(lexicon) do |file|
28
+ index = NGramPrefixDictionary.new(TSV.open(file, :flat, :sep => ';'), "test", true)
29
+
30
+ matches = index.match('AA oo')
31
+ assert matches.select{|m| m.code.include? 'C1'}.any?
32
+ assert matches.include? 'AA'
33
+
34
+ matches = index.match('AA')
35
+ assert matches.select{|m| m.code.include? 'C1'}.any?
36
+ assert matches.include? 'AA'
37
+
38
+ end
39
+ end
40
+
41
+
20
42
  def test_stream
21
43
  lexicon =<<-EOF
22
44
  C1;aa;AA;bb b
@@ -8,9 +8,9 @@ class TestOSCAR4 < Test::Unit::TestCase
8
8
  def test_match
9
9
  begin
10
10
  ner = OSCAR4.new
11
- str = "Alternatively, rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
11
+ str = "Alternatively, CO2 rearrangement of O-(w-haloalkyl)esters 34 of 2-carboethoxy-N-hydroxypyridine-2-selone affords azonianaphthalenium halides 37 in 79% yield"
12
12
 
13
- mentions = ner.match(str, "CM", false)
13
+ mentions = ner.match(str, "CM")
14
14
  good_mentions = ["2-carboethoxy-N-hydroxypyridine-2-selone", "O-(w-haloalkyl)esters"]
15
15
 
16
16
  good_mentions.each{|mention|
@@ -22,7 +22,7 @@ class TestOSCAR4 < Test::Unit::TestCase
22
22
  end
23
23
  end
24
24
 
25
- def test_ranges
25
+ def _test_ranges
26
26
  begin
27
27
  ner = OSCAR4.new
28
28
  str =<<-EOF
@@ -25,9 +25,9 @@ S000000376 AAA GENE1 DDD
25
25
 
26
26
  def test_match
27
27
  assert_equal(["S000000029"], @norm.match("FUN21"))
28
- assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN"))
29
- assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 2"))
30
- assert_equal(["S000000030", "S000000029", "S000000031"], @norm.match("FUN 21"))
28
+ assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN").sort)
29
+ assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 2").sort)
30
+ assert_equal(["S000000030", "S000000029", "S000000031"].sort, @norm.match("FUN 21").sort)
31
31
  assert_equal([], @norm.match("GER4"))
32
32
 
33
33
  @norm.match("FUN21")
@@ -0,0 +1,45 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/nlp/open_nlp/sentence_splitter'
3
+ require 'rbbt/ner/segment'
4
+
5
+ $text=<<-EOF
6
+ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors
7
+ of early childhood poorly responding to therapy. The majority of cases show
8
+ inactivation of SMARCB1 (INI1, hSNF5, BAF47), a core member of the adenosine
9
+ triphosphate (ATP)-dependent SWI/SNF chromatin-remodeling complex. We here
10
+ report the case of a supratentorial AT/RT in a 9-month-old boy, which showed
11
+ retained SMARCB1 staining on immunohistochemistry and lacked genetic
12
+ alterations of SMARCB1. Instead, the tumor showed loss of protein expression of
13
+ another SWI/SNF chromatin-remodeling complex member, the ATPase subunit SMARCA4
14
+ (BRG1) due to a homozygous SMARCA4 mutation [c.2032C>T (p.Q678X)]. Our
15
+ findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
16
+ AT/RT and the usefulness of antibodies directed against SMARCA4 in this
17
+ diagnostic setting.
18
+ EOF
19
+
20
+ class TestClass < Test::Unit::TestCase
21
+
22
+ def test_sentences
23
+ text =<<-EOF
24
+ This is a sentence.
25
+ A funky character ™ in a sentence.
26
+ This is a sentence.
27
+ This is a
28
+ sentence. This is
29
+ another sentence.
30
+ EOF
31
+
32
+ assert_equal 5, OpenNLP.sentence_splitter(text).length
33
+ assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
34
+ end
35
+
36
+ def test_text_sentences
37
+ Misc.benchmark(100) do
38
+ OpenNLP.sentence_splitter($text).include? "Our
39
+ findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
40
+ AT/RT and the usefulness of antibodies directed against SMARCA4 in this
41
+ diagnostic setting."
42
+ end
43
+ end
44
+ end
45
+
data/test/test_helper.rb CHANGED
@@ -9,7 +9,7 @@ require 'rbbt/util/log'
9
9
  require 'rbbt/corpus/document_repo'
10
10
 
11
11
  class Test::Unit::TestCase
12
- def test_datafile(file)
12
+ def get_test_datafile(file)
13
13
  File.join(File.dirname(__FILE__), 'data', file)
14
14
  end
15
15
 
metadata CHANGED
@@ -1,103 +1,104 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
- version: !ruby/object:Gem::Version
4
- hash: 1
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
5
  prerelease:
6
- segments:
7
- - 0
8
- - 6
9
- - 3
10
- version: 0.6.3
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Miguel Vazquez
14
9
  autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
-
18
- date: 2012-02-09 00:00:00 Z
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2012-12-21 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: rbbt-util
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: !ruby/object:Gem::Requirement
24
17
  none: false
25
- requirements:
26
- - - ">="
27
- - !ruby/object:Gem::Version
28
- hash: 63
29
- segments:
30
- - 4
31
- - 0
32
- - 0
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
33
21
  version: 4.0.0
34
22
  type: :runtime
35
- version_requirements: *id001
36
- - !ruby/object:Gem::Dependency
37
- name: stemmer
38
23
  prerelease: false
39
- requirement: &id002 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 4.0.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: stemmer
32
+ requirement: !ruby/object:Gem::Requirement
40
33
  none: false
41
- requirements:
42
- - - ">="
43
- - !ruby/object:Gem::Version
44
- hash: 3
45
- segments:
46
- - 0
47
- version: "0"
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
48
38
  type: :runtime
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
51
- name: libxml-ruby
52
39
  prerelease: false
53
- requirement: &id003 !ruby/object:Gem::Requirement
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: libxml-ruby
48
+ requirement: !ruby/object:Gem::Requirement
54
49
  none: false
55
- requirements:
56
- - - ">="
57
- - !ruby/object:Gem::Version
58
- hash: 3
59
- segments:
60
- - 0
61
- version: "0"
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
62
54
  type: :runtime
63
- version_requirements: *id003
64
- - !ruby/object:Gem::Dependency
65
- name: json
66
55
  prerelease: false
67
- requirement: &id004 !ruby/object:Gem::Requirement
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: json
64
+ requirement: !ruby/object:Gem::Requirement
68
65
  none: false
69
- requirements:
70
- - - ">="
71
- - !ruby/object:Gem::Version
72
- hash: 3
73
- segments:
74
- - 0
75
- version: "0"
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
76
70
  type: :runtime
77
- version_requirements: *id004
78
- - !ruby/object:Gem::Dependency
79
- name: rjb
80
71
  prerelease: false
81
- requirement: &id005 !ruby/object:Gem::Requirement
72
+ version_requirements: !ruby/object:Gem::Requirement
82
73
  none: false
83
- requirements:
84
- - - ">="
85
- - !ruby/object:Gem::Version
86
- hash: 3
87
- segments:
88
- - 0
89
- version: "0"
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: rjb
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ! '>='
84
+ - !ruby/object:Gem::Version
85
+ version: '0'
90
86
  type: :runtime
91
- version_requirements: *id005
92
- description: "Text mining tools: named entity recognition and normalization, document classification, bag-of-words, dictionaries, etc"
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ description: ! 'Text mining tools: named entity recognition and normalization, document
95
+ classification, bag-of-words, dictionaries, etc'
93
96
  email: miguel.vazquez@fdi.ucm.es
94
- executables:
97
+ executables:
95
98
  - get_ppis.rb
96
99
  extensions: []
97
-
98
100
  extra_rdoc_files: []
99
-
100
- files:
101
+ files:
101
102
  - lib/rbbt/bow/bow.rb
102
103
  - lib/rbbt/bow/dictionary.rb
103
104
  - lib/rbbt/bow/misc.rb
@@ -110,6 +111,8 @@ files:
110
111
  - lib/rbbt/ner/abner.rb
111
112
  - lib/rbbt/ner/banner.rb
112
113
  - lib/rbbt/ner/chemical_tagger.rb
114
+ - lib/rbbt/ner/finder.rb
115
+ - lib/rbbt/ner/linnaeus.rb
113
116
  - lib/rbbt/ner/ngram_prefix_dictionary.rb
114
117
  - lib/rbbt/ner/oscar3.rb
115
118
  - lib/rbbt/ner/oscar4.rb
@@ -119,6 +122,7 @@ files:
119
122
  - lib/rbbt/ner/rnorm/cue_index.rb
120
123
  - lib/rbbt/ner/rnorm/tokens.rb
121
124
  - lib/rbbt/ner/segment.rb
125
+ - lib/rbbt/ner/segment/docid.rb
122
126
  - lib/rbbt/ner/segment/named_entity.rb
123
127
  - lib/rbbt/ner/segment/relationship.rb
124
128
  - lib/rbbt/ner/segment/segmented.rb
@@ -127,13 +131,16 @@ files:
127
131
  - lib/rbbt/ner/token_trieNER.rb
128
132
  - lib/rbbt/nlp/genia/sentence_splitter.rb
129
133
  - lib/rbbt/nlp/nlp.rb
134
+ - lib/rbbt/nlp/open_nlp/sentence_splitter.rb
130
135
  - share/install/software/ABNER
131
136
  - share/install/software/BANNER
132
137
  - share/install/software/ChemicalTagger
133
138
  - share/install/software/Gdep
134
139
  - share/install/software/Geniass
140
+ - share/install/software/Linnaeus
135
141
  - share/install/software/OSCAR3
136
142
  - share/install/software/OSCAR4
143
+ - share/install/software/OpenNLP
137
144
  - share/install/software/StanfordParser
138
145
  - share/patterns/drug_induce_disease
139
146
  - share/rnorm/cue_default
@@ -157,44 +164,37 @@ files:
157
164
  - test/rbbt/ner/test_oscar4.rb
158
165
  - test/rbbt/ner/test_chemical_tagger.rb
159
166
  - test/rbbt/ner/test_ngram_prefix_dictionary.rb
167
+ - test/rbbt/ner/test_finder.rb
168
+ - test/rbbt/ner/test_linnaeus.rb
169
+ - test/rbbt/entity/test_document.rb
160
170
  - test/rbbt/nlp/test_nlp.rb
161
- - test/rbbt/corpus/test_corpus.rb
162
- - test/rbbt/corpus/test_document.rb
171
+ - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
163
172
  - bin/get_ppis.rb
164
173
  homepage: http://github.com/mikisvaz/rbbt-util
165
174
  licenses: []
166
-
167
175
  post_install_message:
168
176
  rdoc_options: []
169
-
170
- require_paths:
177
+ require_paths:
171
178
  - lib
172
- required_ruby_version: !ruby/object:Gem::Requirement
179
+ required_ruby_version: !ruby/object:Gem::Requirement
173
180
  none: false
174
- requirements:
175
- - - ">="
176
- - !ruby/object:Gem::Version
177
- hash: 3
178
- segments:
179
- - 0
180
- version: "0"
181
- required_rubygems_version: !ruby/object:Gem::Requirement
181
+ requirements:
182
+ - - ! '>='
183
+ - !ruby/object:Gem::Version
184
+ version: '0'
185
+ required_rubygems_version: !ruby/object:Gem::Requirement
182
186
  none: false
183
- requirements:
184
- - - ">="
185
- - !ruby/object:Gem::Version
186
- hash: 3
187
- segments:
188
- - 0
189
- version: "0"
187
+ requirements:
188
+ - - ! '>='
189
+ - !ruby/object:Gem::Version
190
+ version: '0'
190
191
  requirements: []
191
-
192
192
  rubyforge_project:
193
- rubygems_version: 1.8.10
193
+ rubygems_version: 1.8.24
194
194
  signing_key:
195
195
  specification_version: 3
196
196
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
197
- test_files:
197
+ test_files:
198
198
  - test/test_helper.rb
199
199
  - test/rbbt/bow/test_bow.rb
200
200
  - test/rbbt/bow/test_dictionary.rb
@@ -213,6 +213,8 @@ test_files:
213
213
  - test/rbbt/ner/test_oscar4.rb
214
214
  - test/rbbt/ner/test_chemical_tagger.rb
215
215
  - test/rbbt/ner/test_ngram_prefix_dictionary.rb
216
+ - test/rbbt/ner/test_finder.rb
217
+ - test/rbbt/ner/test_linnaeus.rb
218
+ - test/rbbt/entity/test_document.rb
216
219
  - test/rbbt/nlp/test_nlp.rb
217
- - test/rbbt/corpus/test_corpus.rb
218
- - test/rbbt/corpus/test_document.rb
220
+ - test/rbbt/nlp/open_nlp/test_sentence_splitter.rb