rbbt-text 1.1.7 → 1.1.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/bin/get_ppis.rb +5 -5
- data/lib/rbbt/bow/dictionary.rb +0 -3
- data/lib/rbbt/corpus/document.rb +3 -3
- data/lib/rbbt/corpus/sources/pubmed.rb +2 -1
- data/lib/rbbt/ner/abner.rb +1 -0
- data/lib/rbbt/ner/banner.rb +1 -0
- data/lib/rbbt/ner/brat.rb +30 -0
- data/lib/rbbt/ner/g_norm_plus.rb +80 -0
- data/lib/rbbt/ner/linnaeus.rb +1 -1
- data/lib/rbbt/ner/segment.rb +26 -4
- data/lib/rbbt/ner/segment/named_entity.rb +1 -0
- data/lib/rbbt/ner/segment/relationship.rb +11 -7
- data/lib/rbbt/ner/segment/transformed.rb +44 -33
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +65 -0
- data/lib/rbbt/nlp/nlp.rb +5 -66
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +8 -4
- data/share/install/software/GNormPlus +7 -0
- data/share/install/software/Gdep +1 -1
- data/share/install/software/OpenNLP +1 -1
- data/test/rbbt/ner/segment/test_named_entity.rb +24 -1
- data/test/rbbt/ner/segment/test_relationship.rb +0 -0
- data/test/rbbt/ner/segment/test_transformed.rb +72 -2
- data/test/rbbt/ner/test_brat.rb +64 -0
- data/test/rbbt/ner/test_g_norm_plus.rb +16 -0
- data/test/rbbt/ner/test_segment.rb +0 -1
- data/test/rbbt/nlp/genia/test_sentence_splitter.rb +9 -0
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +4 -1
- metadata +14 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ea1646b5f32644bb5872f57422534b49955f988df26df4a65c8dda592515eac3
|
4
|
+
data.tar.gz: 3f6bc60546b79c76b6b35840712453616c377fcc088f321e95847f116776bef1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9376c68bad67733b5771b57ead7c962d45ff29c44362d1c51bf3480d3c3bf9f1f75284e40044fc4ed95bd94a03ab0759b3b7320bf1e3da00a0cdd82255c9395c
|
7
|
+
data.tar.gz: cd25a9cd91fde366be195801d45238d555edfc94f2b06391db7db2d9f4781b34dd599514385782d6c7e22af2841c5f3322ba74bf0a3a9c1fdbe308a255f00098
|
data/bin/get_ppis.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rbbt-util'
|
4
|
-
require 'rbbt/
|
5
|
-
require 'rbbt/
|
6
|
-
require 'rbbt/annotations/relationships/ppi'
|
4
|
+
require 'rbbt/corpus/corpus'
|
5
|
+
require 'rbbt/corpus/sources/pubmed'
|
6
|
+
#require 'rbbt/annotations/relationships/ppi'
|
7
7
|
require 'rbbt/sources/pubmed'
|
8
|
-
require 'rbbt/ner/annotations'
|
8
|
+
#require 'rbbt/ner/annotations'
|
9
9
|
require 'rbbt/ner/token_trieNER'
|
10
|
-
require 'rbbt/ner/annotations/transformed'
|
10
|
+
#require 'rbbt/ner/annotations/transformed'
|
11
11
|
require 'rbbt/ner/chemical_tagger'
|
12
12
|
|
13
13
|
Corpus.define_entity_ner "Compounds", false do |doc|
|
data/lib/rbbt/bow/dictionary.rb
CHANGED
data/lib/rbbt/corpus/document.rb
CHANGED
@@ -8,10 +8,10 @@ require 'json'
|
|
8
8
|
|
9
9
|
class Document
|
10
10
|
|
11
|
-
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :segments, :
|
11
|
+
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :segments, :segment_indices, :persist_dir, :global_persistence
|
12
12
|
def initialize(persist_dir = nil, docid = nil, text = nil, global_persistence = nil)
|
13
13
|
@segments = {}
|
14
|
-
@
|
14
|
+
@segment_indices = {}
|
15
15
|
|
16
16
|
if not persist_dir.nil?
|
17
17
|
@persist_dir = persist_dir
|
@@ -236,7 +236,7 @@ class Document
|
|
236
236
|
end
|
237
237
|
|
238
238
|
def segment_index(name, persist_dir = nil)
|
239
|
-
@
|
239
|
+
@segment_indices[name] ||= Segment.index(self.send(name), persist_dir.nil? ? :memory : File.join(persist_dir, name + '.range'))
|
240
240
|
end
|
241
241
|
|
242
242
|
def load_into(segment, *annotations)
|
@@ -10,8 +10,9 @@ class Corpus
|
|
10
10
|
type = nil if String === type and type.empty?
|
11
11
|
|
12
12
|
PubMed.get_article(pmids).collect do |pmid, article|
|
13
|
+
add_document(article.title, :pubmed, pmid, :title)
|
13
14
|
if (type.nil? and article.pdf_url.nil?) or (not type.nil? and type.to_sym === :abstract)
|
14
|
-
add_document(article.
|
15
|
+
add_document(article.abstract || "", :pubmed, pmid, :abstract)
|
15
16
|
else
|
16
17
|
raise "No FullText available for #{ pmid }" if article.pdf_url.nil?
|
17
18
|
add_document(article.full_text, :pubmed, pmid, :fulltext)
|
data/lib/rbbt/ner/abner.rb
CHANGED
@@ -11,6 +11,7 @@ class Abner < NER
|
|
11
11
|
Rbbt.claim Rbbt.software.opt.ABNER, :install, Rbbt.share.install.software.ABNER.find
|
12
12
|
|
13
13
|
def self.init
|
14
|
+
Rbbt.software.opt.ABNER.produce
|
14
15
|
@@JFile ||= Rjb::import('java.io.File')
|
15
16
|
@@Tagger ||= Rjb::import('abner.Tagger')
|
16
17
|
@@Trainer ||= Rjb::import('abner.Trainer')
|
data/lib/rbbt/ner/banner.rb
CHANGED
@@ -10,6 +10,7 @@ class Banner < NER
|
|
10
10
|
Rbbt.claim Rbbt.software.opt.BANNER, :install, Rbbt.share.install.software.BANNER.find
|
11
11
|
|
12
12
|
def self.init
|
13
|
+
Rbbt.software.opt.BANNER.produce
|
13
14
|
@@JFile ||= Rjb::import('java.io.File')
|
14
15
|
@@SimpleTokenizer ||= Rjb::import('banner.tokenization.SimpleTokenizer')
|
15
16
|
@@CRFTagger ||= Rjb::import('banner.tagging.CRFTagger')
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rbbt/ner/segment/named_entity'
|
2
|
+
require 'rbbt/ner/segment/relationship'
|
3
|
+
module Brat
|
4
|
+
Rbbt.claim Rbbt.software.opt.Brat, :install, "https://github.com/nlplab/brat.git"
|
5
|
+
|
6
|
+
def self.load(file)
|
7
|
+
entities = {}
|
8
|
+
relationships = {}
|
9
|
+
entity_ids = {}
|
10
|
+
TSV.traverse file, :type => :array do |line|
|
11
|
+
id, info, literal = line.split("\t")
|
12
|
+
case id[0]
|
13
|
+
when "T"
|
14
|
+
type, start, eend = info.split(" ")
|
15
|
+
entities[id] = NamedEntity.setup(literal, :offset => start.to_i, :type => type)
|
16
|
+
when "#"
|
17
|
+
type, id = info.split(" ")
|
18
|
+
entities[id].code = literal unless entities[id].nil?
|
19
|
+
when "R"
|
20
|
+
type, *args = info.split(" ")
|
21
|
+
tf, tg = args.collect{|e| e.split(":").last }
|
22
|
+
tf = entities[tf]
|
23
|
+
tg = entities[tg]
|
24
|
+
relationship = Relationship.setup([tf,tg] * "~" + "#" + type, :terms => [tf,tg], :type => type)
|
25
|
+
relationships[id] = relationship
|
26
|
+
end
|
27
|
+
end
|
28
|
+
[entities.values, relationships.values]
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
module GNormPlus
|
3
|
+
|
4
|
+
Rbbt.claim Rbbt.software.opt.GNormPlus, :install do
|
5
|
+
url = "https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/download/GNormPlus/GNormPlusJava.zip"
|
6
|
+
script =<<-EOF
|
7
|
+
(cd $(opt_dir $name); sh Installation.sh; chmod +x Ab3P identify_abbr)
|
8
|
+
EOF
|
9
|
+
{:src => url, :commands => script}
|
10
|
+
end
|
11
|
+
|
12
|
+
CONFIG =<<-EOF
|
13
|
+
|
14
|
+
#===Annotation
|
15
|
+
#Attribution setting:
|
16
|
+
#FocusSpecies = Taxonomy ID
|
17
|
+
# All: All species
|
18
|
+
# 9606: Human
|
19
|
+
# 4932: yeast
|
20
|
+
# 7227: Fly
|
21
|
+
# 10090: Mouse
|
22
|
+
# 10116: Rat
|
23
|
+
# 7955: Zebrafish
|
24
|
+
# 3702: Arabidopsis thaliana
|
25
|
+
#open: True
|
26
|
+
#close: False
|
27
|
+
|
28
|
+
[Focus Species]
|
29
|
+
FocusSpecies = All
|
30
|
+
[Dictionary & Model]
|
31
|
+
DictionaryFolder = ./Dictionary
|
32
|
+
GNRModel = ./Dictionary/GNR.Model
|
33
|
+
SCModel = ./Dictionary/SimConcept.Model
|
34
|
+
GeneIDMatch = True
|
35
|
+
Normalization2Protein = False
|
36
|
+
DeleteTmp = True
|
37
|
+
EOF
|
38
|
+
|
39
|
+
def self.process(texts)
|
40
|
+
TmpFile.with_file do |tmpdir|
|
41
|
+
Open.mkdir tmpdir
|
42
|
+
Misc.in_dir tmpdir do
|
43
|
+
Open.ln_s Rbbt.software.opt.GNormPlus.Dictionary.find, '.'
|
44
|
+
Open.ln_s Rbbt.software.opt.GNormPlus["BioC.dtd"].find, '.'
|
45
|
+
Open.ln_s Rbbt.software.opt.GNormPlus["Ab3P"].find, '.'
|
46
|
+
Open.ln_s Rbbt.software.opt.GNormPlus["CRF"].find, '.'
|
47
|
+
Open.mkdir 'input'
|
48
|
+
Open.mkdir 'output'
|
49
|
+
Open.mkdir 'tmp'
|
50
|
+
|
51
|
+
texts.each do |name,text|
|
52
|
+
Open.write("input/#{name}.txt") do |f|
|
53
|
+
f.puts "#{name}|a|" << text
|
54
|
+
f.puts
|
55
|
+
end
|
56
|
+
end
|
57
|
+
Open.write('config', CONFIG)
|
58
|
+
CMD.cmd_log("java -Xmx20G -Xms20G -jar '#{Rbbt.software.opt.GNormPlus.find}/GNormPlus.jar' 'input' 'output' 'config'")
|
59
|
+
|
60
|
+
if texts.respond_to? :key_field
|
61
|
+
key_field = texts.key_field
|
62
|
+
else
|
63
|
+
key_field = "ID"
|
64
|
+
end
|
65
|
+
tsv = TSV.setup({}, :key_field => key_field, :fields => ["Entities"], :type => :flat)
|
66
|
+
Dir.glob("output/*.txt").each do |file|
|
67
|
+
name = File.basename(file).sub(".txt",'')
|
68
|
+
entities = Open.read(file).split("\n")[1..-1].collect{|l| l.gsub(':', '.').split("\t")[1..-1] * ":"}
|
69
|
+
tsv[name] = entities
|
70
|
+
end
|
71
|
+
tsv
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
if __FILE__ == $0
|
78
|
+
Log.severity = 0
|
79
|
+
Rbbt.software.opt.GNormPlus.produce
|
80
|
+
end
|
data/lib/rbbt/ner/linnaeus.rb
CHANGED
@@ -8,8 +8,8 @@ module Linnaeus
|
|
8
8
|
|
9
9
|
ARGS = ["--properties", Rbbt.software.opt.Linnaeus["species-proxy/properties.conf"].find]
|
10
10
|
|
11
|
-
Rjb::load(nil, jvmargs = ['-Xms2G','-Xmx2G']) unless Rjb.loaded?
|
12
11
|
|
12
|
+
Rjb::load(nil, jvmargs = ['-Xms2G','-Xmx2G']) unless Rjb.loaded?
|
13
13
|
def self.init
|
14
14
|
begin
|
15
15
|
@@ArgParser = Rjb::import('martin.common.ArgParser')
|
data/lib/rbbt/ner/segment.rb
CHANGED
@@ -72,6 +72,17 @@ module Segment
|
|
72
72
|
(segment.offset.to_i + segment.segment_length.to_i <= self.offset.to_i + self.segment_length.to_i)
|
73
73
|
end
|
74
74
|
|
75
|
+
def overlaps?(segment)
|
76
|
+
segment.offset.to_i >= self.offset.to_i && segment.offset.to_i <= self.end ||
|
77
|
+
self.offset.to_i >= segment.offset.to_i && self.offset.to_i <= segment.end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.collisions(main, secondary)
|
81
|
+
collisions = secondary.select do |ss|
|
82
|
+
collisions = main.select{|ms| ms.overlaps? ss }.any?
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
75
86
|
#{{{ Sorting
|
76
87
|
|
77
88
|
def self.sort(segments, inline = true)
|
@@ -84,14 +95,14 @@ module Segment
|
|
84
95
|
-1
|
85
96
|
when (b.nil? or b.offset.nil?)
|
86
97
|
+1
|
87
|
-
when (not a.range.include? b.offset and not b.range.include? a.offset)
|
88
|
-
a.offset <=> b.offset
|
98
|
+
when (not a.range.include? b.offset.to_i and not b.range.include? a.offset.to_i)
|
99
|
+
a.offset.to_i <=> b.offset.to_i
|
89
100
|
else
|
90
101
|
a.segment_length <=> b.segment_length
|
91
102
|
end
|
92
103
|
end
|
93
104
|
else
|
94
|
-
segments.sort_by do |segment| segment.offset || 0 end.reverse
|
105
|
+
segments.sort_by do |segment| segment.offset.to_i || 0 end.reverse
|
95
106
|
end
|
96
107
|
end
|
97
108
|
|
@@ -282,7 +293,7 @@ module Segment
|
|
282
293
|
|
283
294
|
info[:annotation_types] = [Segment] unless info.include? :annotation_types
|
284
295
|
|
285
|
-
Annotated.
|
296
|
+
Annotated.load_entity(object, info)
|
286
297
|
end
|
287
298
|
|
288
299
|
def self.set_tsv_fields(fields, segments)
|
@@ -324,5 +335,16 @@ module Segment
|
|
324
335
|
end
|
325
336
|
end
|
326
337
|
|
338
|
+
def ansi(color)
|
339
|
+
Log.color color, self
|
340
|
+
end
|
341
|
+
|
342
|
+
def locus
|
343
|
+
[offset, self.end] * ".."
|
344
|
+
end
|
345
|
+
|
346
|
+
def ==(other)
|
347
|
+
self.id == other.id
|
348
|
+
end
|
327
349
|
end
|
328
350
|
|
@@ -2,19 +2,23 @@ require 'rbbt/ner/segment'
|
|
2
2
|
|
3
3
|
module Relationship
|
4
4
|
extend Annotation
|
5
|
-
|
5
|
+
self.annotation :segment
|
6
6
|
self.annotation :terms
|
7
|
+
self.annotation :type
|
8
|
+
|
9
|
+
def text
|
10
|
+
if segment
|
11
|
+
segment
|
12
|
+
else
|
13
|
+
type + ": " + terms * ", "
|
14
|
+
end
|
15
|
+
end
|
7
16
|
|
8
17
|
def html
|
9
18
|
text = <<-EOF
|
10
19
|
<span class='Relationship'\
|
11
|
-
>#{ self }</span>
|
20
|
+
>#{ self.text }</span>
|
12
21
|
EOF
|
13
22
|
text.chomp
|
14
23
|
end
|
15
|
-
|
16
|
-
def html_with_entities(*types)
|
17
|
-
annotations.values_at(*types).each do |segments|
|
18
|
-
end
|
19
|
-
end
|
20
24
|
end
|
@@ -6,7 +6,7 @@ module Transformed
|
|
6
6
|
def self.transform(text, segments, replacement = nil, &block)
|
7
7
|
|
8
8
|
text.extend Transformed
|
9
|
-
text.
|
9
|
+
text.replace_segments(segments, replacement, &block)
|
10
10
|
|
11
11
|
text
|
12
12
|
end
|
@@ -14,11 +14,11 @@ module Transformed
|
|
14
14
|
def self.with_transform(text, segments, replacement = nil)
|
15
15
|
|
16
16
|
text.extend Transformed
|
17
|
-
text.
|
17
|
+
text.replace_segments(segments, replacement)
|
18
18
|
|
19
19
|
segments = yield text
|
20
20
|
|
21
|
-
segments = nil unless Array === segments
|
21
|
+
segments = nil unless Array === segments && Segment === segments.first
|
22
22
|
|
23
23
|
text.restore(segments, true)
|
24
24
|
end
|
@@ -59,39 +59,41 @@ module Transformed
|
|
59
59
|
[begin_shift, end_shift]
|
60
60
|
end
|
61
61
|
|
62
|
-
def self.sort(segments)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
def
|
62
|
+
#def self.sort(segments)
|
63
|
+
# segments.compact.sort do |a,b|
|
64
|
+
# case
|
65
|
+
# when ((a.nil? && b.nil?) || (a.offset.nil? && b.offset.nil?))
|
66
|
+
# 0
|
67
|
+
# when (a.nil? || a.offset.nil?)
|
68
|
+
# -1
|
69
|
+
# when (b.nil? || b.offset.nil?)
|
70
|
+
# +1
|
71
|
+
# # Non-overlap
|
72
|
+
# when (a.end < b.offset.to_i || b.end < a.offset.to_i)
|
73
|
+
# b.offset <=> a.offset
|
74
|
+
# # b includes a
|
75
|
+
# when (a.offset.to_i >= b.offset.to_i && a.end <= b.end)
|
76
|
+
# -1
|
77
|
+
# # b includes a
|
78
|
+
# when (b.offset.to_i >= a.offset.to_i && b.end <= a.end)
|
79
|
+
# +1
|
80
|
+
# # Overlap
|
81
|
+
# when (a.offset.to_i > b.offset.to_i && a.end > b.end || b.offset.to_i > a.offset.to_i && b.end > a.end)
|
82
|
+
# b.length <=> a.length
|
83
|
+
# else
|
84
|
+
# raise "Unexpected case in sort: #{a.range} - #{b.range}"
|
85
|
+
# end
|
86
|
+
# end
|
87
|
+
#end
|
88
|
+
|
89
|
+
def replace_segments(segments, replacement = nil, &block)
|
90
90
|
@transformed_segments ||= {}
|
91
91
|
@transformation_stack ||= []
|
92
92
|
stack = []
|
93
93
|
|
94
|
-
|
94
|
+
segments = [segments] unless Array === segments
|
95
|
+
orig_length = self.length
|
96
|
+
Segment.sort(segments).each do |segment|
|
95
97
|
next if segment.offset.nil?
|
96
98
|
shift = shift segment.range
|
97
99
|
|
@@ -106,6 +108,10 @@ module Transformed
|
|
106
108
|
updated_range = (updated_begin..updated_end)
|
107
109
|
|
108
110
|
updated_text = self[updated_begin..updated_end]
|
111
|
+
if updated_text.nil?
|
112
|
+
Log.warn "Range outside of segment: #{self.length} #{segment.locus} (#{updated_range})"
|
113
|
+
next
|
114
|
+
end
|
109
115
|
|
110
116
|
original_text = segment.dup
|
111
117
|
segment.replace updated_text
|
@@ -137,7 +143,7 @@ module Transformed
|
|
137
143
|
when segment.end < range.begin
|
138
144
|
# After
|
139
145
|
when segment.offset.to_i > range.end + diff
|
140
|
-
segment.offset.to_i
|
146
|
+
segment.offset = segment.offset.to_i - diff
|
141
147
|
# Includes
|
142
148
|
when (segment.offset.to_i <= range.begin and segment.end >= range.end + diff)
|
143
149
|
segment.replace self[segment.offset.to_i..segment.end - diff]
|
@@ -170,4 +176,9 @@ module Transformed
|
|
170
176
|
segments
|
171
177
|
end
|
172
178
|
end
|
179
|
+
|
180
|
+
def self.ansi(text, entities, colors = nil)
|
181
|
+
|
182
|
+
|
183
|
+
end
|
173
184
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rbbt/nlp/nlp'
|
2
2
|
require 'rbbt/ner/segment'
|
3
3
|
module NLP
|
4
|
+
Rbbt.claim Rbbt.software.opt.Geniass, :install, Rbbt.share.install.software.Geniass.find
|
5
|
+
|
4
6
|
def self.returnFeatures(prevWord, delimiter, nextWord)
|
5
7
|
if nextWord.match(/__ss__/)
|
6
8
|
nw = nextWord.sub(/__ss__/, "")
|
@@ -235,4 +237,67 @@ module NLP
|
|
235
237
|
end
|
236
238
|
|
237
239
|
end
|
240
|
+
|
241
|
+
def self.geniass_sentence_splitter(text)
|
242
|
+
offsets = []
|
243
|
+
|
244
|
+
cleaned = text.gsub("\n",NEW_LINE_MASK)
|
245
|
+
TmpFile.with_file(cleaned) do |fin|
|
246
|
+
TmpFile.with_file do |fout|
|
247
|
+
CMD.cmd("cd #{Rbbt.software.opt.Geniass.find}; ./geniass #{ fin } #{ fout }")
|
248
|
+
|
249
|
+
|
250
|
+
Open.write(fin, Open.read(fin).gsub(NEW_LINE_MASK, "\n"))
|
251
|
+
Open.write(fout, Open.read(fout).gsub("\n", '|').gsub(NEW_LINE_MASK, "\n"))
|
252
|
+
# Addapted from sentence2standOff.rb in Geniass package
|
253
|
+
|
254
|
+
inTxtStrict = Open.open(fin)
|
255
|
+
inTxtNew = Open.open(fout)
|
256
|
+
|
257
|
+
marker = "|"[0]
|
258
|
+
position = 0
|
259
|
+
sentenceCount = 1
|
260
|
+
target = ''
|
261
|
+
targetNew = ''
|
262
|
+
start = 0
|
263
|
+
finish = 0
|
264
|
+
|
265
|
+
while(!inTxtNew.eof?) do
|
266
|
+
targetNew = inTxtNew.getc
|
267
|
+
target = inTxtStrict.getc
|
268
|
+
position += 1
|
269
|
+
if targetNew == marker
|
270
|
+
sentenceCount += 1
|
271
|
+
finish = position - 1
|
272
|
+
offsets << [start, finish] if finish - start > 10
|
273
|
+
if targetNew == target
|
274
|
+
start = position
|
275
|
+
else
|
276
|
+
targetNew = inTxtNew.getc
|
277
|
+
while targetNew != target do
|
278
|
+
target = inTxtStrict.getc
|
279
|
+
position += 1
|
280
|
+
end
|
281
|
+
start = position - 1
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
finish = position - 1
|
287
|
+
offsets << [start, finish] if finish > start
|
288
|
+
|
289
|
+
inTxtStrict.close
|
290
|
+
inTxtNew.close
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
offsets.collect do |s,e|
|
295
|
+
sentence = text[s..e]
|
296
|
+
next if sentence.nil?
|
297
|
+
#sentence.gsub!(NEW_LINE_MASK, "\n")
|
298
|
+
Segment.setup sentence, s
|
299
|
+
sentence
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
238
303
|
end
|
data/lib/rbbt/nlp/nlp.rb
CHANGED
@@ -16,76 +16,10 @@ module NLP
|
|
16
16
|
#Rbbt.software.opt.StanfordParser.define_as_install Rbbt.share.install.software.StanfordParser.find
|
17
17
|
#Rbbt.software.opt.StanfordParser.produce
|
18
18
|
|
19
|
-
Rbbt.claim Rbbt.software.opt.Geniass, :install, Rbbt.share.install.software.Geniass.find
|
20
|
-
Rbbt.software.opt.Geniass.produce
|
21
|
-
|
22
19
|
Rbbt.claim Rbbt.software.opt.Gdep, :install, Rbbt.share.install.software.Gdep.find
|
23
|
-
Rbbt.software.opt.Gdep.produce
|
24
20
|
|
25
21
|
NEW_LINE_MASK = "\t\t \t \t"
|
26
22
|
|
27
|
-
def self.geniass_sentence_splitter(text)
|
28
|
-
offsets = []
|
29
|
-
|
30
|
-
cleaned = text.gsub("\n",NEW_LINE_MASK)
|
31
|
-
TmpFile.with_file(cleaned) do |fin|
|
32
|
-
TmpFile.with_file do |fout|
|
33
|
-
CMD.cmd("cd #{Rbbt.software.opt.Geniass.find}; ./geniass #{ fin } #{ fout }")
|
34
|
-
|
35
|
-
|
36
|
-
Open.write(fin, Open.read(fin).gsub(NEW_LINE_MASK, "\n"))
|
37
|
-
Open.write(fout, Open.read(fout).gsub("\n", '|').gsub(NEW_LINE_MASK, "\n"))
|
38
|
-
# Addapted from sentence2standOff.rb in Geniass package
|
39
|
-
|
40
|
-
inTxtStrict = Open.open(fin)
|
41
|
-
inTxtNew = Open.open(fout)
|
42
|
-
|
43
|
-
marker = "|"[0]
|
44
|
-
position = 0
|
45
|
-
sentenceCount = 1
|
46
|
-
target = ''
|
47
|
-
targetNew = ''
|
48
|
-
start = 0
|
49
|
-
finish = 0
|
50
|
-
|
51
|
-
while(!inTxtNew.eof?) do
|
52
|
-
targetNew = inTxtNew.getc
|
53
|
-
target = inTxtStrict.getc
|
54
|
-
position += 1
|
55
|
-
if targetNew == marker
|
56
|
-
sentenceCount += 1
|
57
|
-
finish = position - 1
|
58
|
-
offsets << [start, finish] if finish - start > 10
|
59
|
-
if targetNew == target
|
60
|
-
start = position
|
61
|
-
else
|
62
|
-
targetNew = inTxtNew.getc
|
63
|
-
while targetNew != target do
|
64
|
-
target = inTxtStrict.getc
|
65
|
-
position += 1
|
66
|
-
end
|
67
|
-
start = position - 1
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
finish = position - 1
|
73
|
-
offsets << [start, finish] if finish > start
|
74
|
-
|
75
|
-
inTxtStrict.close
|
76
|
-
inTxtNew.close
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
offsets.collect do |s,e|
|
81
|
-
sentence = text[s..e]
|
82
|
-
next if sentence.nil?
|
83
|
-
#sentence.gsub!(NEW_LINE_MASK, "\n")
|
84
|
-
Segment.setup sentence, s
|
85
|
-
sentence
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
23
|
module GdepToken
|
90
24
|
extend Annotation
|
91
25
|
include Segment
|
@@ -219,3 +153,8 @@ module NLP
|
|
219
153
|
end
|
220
154
|
end
|
221
155
|
end
|
156
|
+
|
157
|
+
if __FILE__ == $0
|
158
|
+
Log.severity = 0
|
159
|
+
Rbbt.software.opt.Gdep.produce
|
160
|
+
end
|
@@ -6,16 +6,20 @@ require 'rbbt/resource'
|
|
6
6
|
module OpenNLP
|
7
7
|
Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
|
8
8
|
|
9
|
+
|
9
10
|
Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
|
10
11
|
|
11
12
|
MAX = 5
|
12
13
|
|
13
|
-
@@FileInputStream = Rjb::import('java.io.FileInputStream')
|
14
|
-
@@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
|
15
|
-
@@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
|
16
|
-
|
17
14
|
def self.sentence_split_detector
|
18
15
|
@@sentence_split_detector ||= begin
|
16
|
+
Rbbt.software.opt.OpenNLP.produce
|
17
|
+
Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce
|
18
|
+
|
19
|
+
@@FileInputStream = Rjb::import('java.io.FileInputStream')
|
20
|
+
@@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
|
21
|
+
@@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
|
22
|
+
|
19
23
|
modelIn = @@FileInputStream.new(Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce.find);
|
20
24
|
|
21
25
|
model = @@SentenceModel.new(modelIn);
|
data/share/install/software/Gdep
CHANGED
@@ -6,7 +6,7 @@ class TestClass < Test::Unit::TestCase
|
|
6
6
|
def test_info
|
7
7
|
a = ["test"]
|
8
8
|
NamedEntity.setup a
|
9
|
-
assert(
|
9
|
+
assert(a.info[:code].nil?)
|
10
10
|
a.code = 10
|
11
11
|
a.offset = 100
|
12
12
|
assert a.info.include? :code
|
@@ -26,4 +26,27 @@ class TestClass < Test::Unit::TestCase
|
|
26
26
|
assert Segment.tsv([a], nil).fields.include? "code"
|
27
27
|
assert Segment.tsv([a], "literal").fields.include? "code"
|
28
28
|
end
|
29
|
+
|
30
|
+
def test_segment_brat
|
31
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
32
|
+
|
33
|
+
gene1 = "TP53"
|
34
|
+
gene1.extend NamedEntity
|
35
|
+
gene1.offset = a.index gene1
|
36
|
+
gene1.type = "Gene"
|
37
|
+
|
38
|
+
gene2 = "CDK5R1"
|
39
|
+
gene2.extend NamedEntity
|
40
|
+
gene2.offset = a.index gene2
|
41
|
+
gene2.type = "Gene"
|
42
|
+
|
43
|
+
gene3 = "TP53 gene"
|
44
|
+
gene3.extend NamedEntity
|
45
|
+
gene3.offset = a.index gene3
|
46
|
+
gene3.type = "Gene"
|
47
|
+
|
48
|
+
segments = [gene1, gene2, gene3]
|
49
|
+
assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"
|
50
|
+
|
51
|
+
end
|
29
52
|
end
|
File without changes
|
@@ -2,10 +2,23 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_he
|
|
2
2
|
require 'rbbt/ner/segment/transformed'
|
3
3
|
require 'rbbt/ner/segment/named_entity'
|
4
4
|
require 'rexml/document'
|
5
|
-
require 'rand'
|
6
5
|
|
7
6
|
class TestClass < Test::Unit::TestCase
|
8
|
-
def
|
7
|
+
def test_sort
|
8
|
+
text = <<-EOF
|
9
|
+
More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
|
10
|
+
EOF
|
11
|
+
|
12
|
+
entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].reverse.collect do |literal|
|
13
|
+
NamedEntity.setup(literal, :offset => text.index(literal))
|
14
|
+
end
|
15
|
+
|
16
|
+
Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
|
17
|
+
assert text.include? "such as [IL-2]"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def ___test_transform
|
9
22
|
a = "This sentence mentions the TP53 gene and the CDK5 protein"
|
10
23
|
original = a.dup
|
11
24
|
|
@@ -27,6 +40,8 @@ class TestClass < Test::Unit::TestCase
|
|
27
40
|
c[gene1.range] = "GN"
|
28
41
|
assert_equal c, Transformed.transform(a,[gene1], "GN")
|
29
42
|
|
43
|
+
iii a.transformation_offset_differences
|
44
|
+
raise
|
30
45
|
assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
|
31
46
|
assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
|
32
47
|
|
@@ -216,5 +231,60 @@ class TestClass < Test::Unit::TestCase
|
|
216
231
|
end
|
217
232
|
end
|
218
233
|
end
|
234
|
+
|
235
|
+
def test_nested_transform
|
236
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
237
|
+
|
238
|
+
gene1 = "TP53"
|
239
|
+
gene1.extend NamedEntity
|
240
|
+
gene1.offset = a.index gene1
|
241
|
+
gene1.type = "Gene"
|
242
|
+
|
243
|
+
gene2 = "CDK5R1"
|
244
|
+
gene2.extend NamedEntity
|
245
|
+
gene2.offset = a.index gene2
|
246
|
+
gene2.type = "Protein"
|
247
|
+
|
248
|
+
Transformed.with_transform(a, [gene1,gene2], "[G]") do
|
249
|
+
assert_equal "This sentence mentions the [G] gene and the [G] protein", a
|
250
|
+
end
|
251
|
+
Transformed.with_transform(a, [gene1], "[G1]") do
|
252
|
+
Transformed.with_transform(a, [gene2], "[G2]") do
|
253
|
+
assert_equal "This sentence mentions the [G1] gene and the [G2] protein", a
|
254
|
+
end
|
255
|
+
end
|
256
|
+
Transformed.with_transform(a, [gene2], "[G2]") do
|
257
|
+
Transformed.with_transform(a, [gene1], "[G1]") do
|
258
|
+
assert_equal "This sentence mentions the [G1] gene and the [G2] protein", a
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def test_offset_transform
|
264
|
+
a = "ILF can bind to purine-rich regulatory motifs such as the human T-cell leukemia virus-long terminal region and the interleukin-2 promoter."
|
265
|
+
|
266
|
+
gene1 = "ILF"
|
267
|
+
gene1.extend NamedEntity
|
268
|
+
gene1.offset = a.index gene1
|
269
|
+
gene1.type = "Gene"
|
270
|
+
|
271
|
+
gene2 = "interleukin-2"
|
272
|
+
gene2.extend NamedEntity
|
273
|
+
gene2.offset = a.index gene2
|
274
|
+
gene2.type = "Protein"
|
275
|
+
|
276
|
+
Transformed.with_transform(a, [gene1,gene2], "[G]") do
|
277
|
+
assert_equal "[G] can bind to purine-rich regulatory motifs such as the human T-cell leukemia virus-long terminal region and the [G] promoter.", a
|
278
|
+
end
|
279
|
+
|
280
|
+
offset = 100
|
281
|
+
a = Segment.setup(a, :offset => offset)
|
282
|
+
gene1.offset += offset
|
283
|
+
gene2.offset += offset
|
284
|
+
Transformed.with_transform(a, [gene1,gene2], "[G]") do
|
285
|
+
assert_equal "[G] can bind to purine-rich regulatory motifs such as the human T-cell leukemia virus-long terminal region and the [G] promoter.", a
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
219
289
|
end
|
220
290
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/brat'
|
3
|
+
|
4
|
+
class TestBrat < Test::Unit::TestCase
|
5
|
+
def test_load
|
6
|
+
text =<<-EOF
|
7
|
+
T2 DBTF 52 55 Nrl
|
8
|
+
#2 AnnotatorNotes T2 4901
|
9
|
+
T3 NONDBTF 80 89 rhodopsin
|
10
|
+
#3 AnnotatorNotes T3 6010
|
11
|
+
T4 BIOLOGICALPROCESS 90 105 gene expression
|
12
|
+
#4 AnnotatorNotes T4 -
|
13
|
+
T5 DBTF 127 130 Nrl
|
14
|
+
#5 AnnotatorNotes T5 4901
|
15
|
+
T7 MOLECULARFUNCTION 197 204 binding
|
16
|
+
#7 AnnotatorNotes T7 -
|
17
|
+
T8 PHENOTYPE 241 252 extended AP
|
18
|
+
#8 AnnotatorNotes T8 -
|
19
|
+
T10 DBTF 331 334 Nrl
|
20
|
+
#10 AnnotatorNotes T10 4901
|
21
|
+
T11 TISSUE 381 399 photoreceptor cell
|
22
|
+
#11 AnnotatorNotes T11 -
|
23
|
+
T12 NONDBTF 414 423 rhodopsin
|
24
|
+
#12 AnnotatorNotes T12 6010
|
25
|
+
T13 CELLULARCOMPONENT 494 501 nuclear
|
26
|
+
#13 AnnotatorNotes T13 -
|
27
|
+
T14 TISSUE 548 572 retinoblastoma cell line
|
28
|
+
#14 AnnotatorNotes T14 -
|
29
|
+
T17 NONDBTF 660 669 rhodopsin
|
30
|
+
#17 AnnotatorNotes T17 6010
|
31
|
+
T18 DBTF 676 679 Nrl
|
32
|
+
#18 AnnotatorNotes T18 4901
|
33
|
+
T19 CELLULARCOMPONENT 749 764 protein complex
|
34
|
+
#19 AnnotatorNotes T19 -
|
35
|
+
T20 DBTF 797 800 Nrl
|
36
|
+
#20 AnnotatorNotes T20 4901
|
37
|
+
T21 DBTF 853 856 Nrl
|
38
|
+
#21 AnnotatorNotes T21 4901
|
39
|
+
T22 MOLECULARFUNCTION 882 892 luciferase
|
40
|
+
#22 AnnotatorNotes T22 -
|
41
|
+
T23 DBTF 943 946 Nrl
|
42
|
+
#23 AnnotatorNotes T23 4901
|
43
|
+
T24 NONDBTF 989 998 rhodopsin
|
44
|
+
#24 AnnotatorNotes T24 6010
|
45
|
+
T26 DBTF 1110 1113 Nrl
|
46
|
+
#26 AnnotatorNotes T26 4901
|
47
|
+
T27 DBTF 1224 1227 Nrl
|
48
|
+
#27 AnnotatorNotes T27 4901
|
49
|
+
T28 DBTF 1271 1274 Nrl
|
50
|
+
#28 AnnotatorNotes T28 4901
|
51
|
+
T30 DBTF 1385 1388 Nrl
|
52
|
+
#30 AnnotatorNotes T30 4901
|
53
|
+
R1 ACTIVATION Arg1:T2 Arg2:T3
|
54
|
+
R2 ACTIVATION Arg1:T10 Arg2:T12
|
55
|
+
R3 ACTIVATION Arg1:T23 Arg2:T24
|
56
|
+
T1 DBTF 250 254 AP-1
|
57
|
+
EOF
|
58
|
+
|
59
|
+
io = StringIO.new text
|
60
|
+
iii Brat.load io
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/g_norm_plus'
|
3
|
+
|
4
|
+
Log.severity = 0
|
5
|
+
class TestGNormPlus < Test::Unit::TestCase
|
6
|
+
def test_match
|
7
|
+
text =<<-EOF
|
8
|
+
We found that TP53 is regulated by MDM2 in Homo sapiens
|
9
|
+
EOF
|
10
|
+
|
11
|
+
|
12
|
+
mentions = GNormPlus.process({:file => text})
|
13
|
+
Log.tsv mentions
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
@@ -29,11 +29,14 @@ sentence. This is
|
|
29
29
|
another sentence.
|
30
30
|
EOF
|
31
31
|
|
32
|
+
iii OpenNLP.sentence_split_detector.sentDetect(text)
|
33
|
+
assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
|
34
|
+
|
32
35
|
assert_equal 5, OpenNLP.sentence_splitter(text).length
|
33
36
|
assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
|
34
37
|
end
|
35
38
|
|
36
|
-
def
|
39
|
+
def _test_text_sentences
|
37
40
|
Misc.benchmark(100) do
|
38
41
|
OpenNLP.sentence_splitter($text).include? "Our
|
39
42
|
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -66,20 +66,6 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rjb
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
69
|
description: 'Text mining tools: named entity recognition and normalization, document
|
84
70
|
classification, bag-of-words, dictionaries, etc'
|
85
71
|
email: miguel.vazquez@fdi.ucm.es
|
@@ -100,8 +86,10 @@ files:
|
|
100
86
|
- lib/rbbt/ner/NER.rb
|
101
87
|
- lib/rbbt/ner/abner.rb
|
102
88
|
- lib/rbbt/ner/banner.rb
|
89
|
+
- lib/rbbt/ner/brat.rb
|
103
90
|
- lib/rbbt/ner/chemical_tagger.rb
|
104
91
|
- lib/rbbt/ner/finder.rb
|
92
|
+
- lib/rbbt/ner/g_norm_plus.rb
|
105
93
|
- lib/rbbt/ner/linnaeus.rb
|
106
94
|
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
107
95
|
- lib/rbbt/ner/oscar3.rb
|
@@ -125,6 +113,7 @@ files:
|
|
125
113
|
- share/install/software/ABNER
|
126
114
|
- share/install/software/BANNER
|
127
115
|
- share/install/software/ChemicalTagger
|
116
|
+
- share/install/software/GNormPlus
|
128
117
|
- share/install/software/Gdep
|
129
118
|
- share/install/software/Geniass
|
130
119
|
- share/install/software/Linnaeus
|
@@ -141,13 +130,16 @@ files:
|
|
141
130
|
- test/rbbt/bow/test_misc.rb
|
142
131
|
- test/rbbt/entity/test_document.rb
|
143
132
|
- test/rbbt/ner/segment/test_named_entity.rb
|
133
|
+
- test/rbbt/ner/segment/test_relationship.rb
|
144
134
|
- test/rbbt/ner/segment/test_segmented.rb
|
145
135
|
- test/rbbt/ner/segment/test_transformed.rb
|
146
136
|
- test/rbbt/ner/test_NER.rb
|
147
137
|
- test/rbbt/ner/test_abner.rb
|
148
138
|
- test/rbbt/ner/test_banner.rb
|
139
|
+
- test/rbbt/ner/test_brat.rb
|
149
140
|
- test/rbbt/ner/test_chemical_tagger.rb
|
150
141
|
- test/rbbt/ner/test_finder.rb
|
142
|
+
- test/rbbt/ner/test_g_norm_plus.rb
|
151
143
|
- test/rbbt/ner/test_linnaeus.rb
|
152
144
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
153
145
|
- test/rbbt/ner/test_oscar4.rb
|
@@ -156,6 +148,7 @@ files:
|
|
156
148
|
- test/rbbt/ner/test_rnorm.rb
|
157
149
|
- test/rbbt/ner/test_segment.rb
|
158
150
|
- test/rbbt/ner/test_token_trieNER.rb
|
151
|
+
- test/rbbt/nlp/genia/test_sentence_splitter.rb
|
159
152
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
160
153
|
- test/rbbt/nlp/test_nlp.rb
|
161
154
|
- test/test_helper.rb
|
@@ -177,14 +170,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
170
|
- !ruby/object:Gem::Version
|
178
171
|
version: '0'
|
179
172
|
requirements: []
|
180
|
-
|
181
|
-
rubygems_version: 2.6.13
|
173
|
+
rubygems_version: 3.0.6
|
182
174
|
signing_key:
|
183
175
|
specification_version: 4
|
184
176
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
185
177
|
test_files:
|
186
178
|
- test/rbbt/nlp/test_nlp.rb
|
187
179
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
180
|
+
- test/rbbt/nlp/genia/test_sentence_splitter.rb
|
188
181
|
- test/rbbt/bow/test_bow.rb
|
189
182
|
- test/rbbt/bow/test_misc.rb
|
190
183
|
- test/rbbt/bow/test_dictionary.rb
|
@@ -195,6 +188,8 @@ test_files:
|
|
195
188
|
- test/rbbt/ner/test_rnorm.rb
|
196
189
|
- test/rbbt/ner/test_regexpNER.rb
|
197
190
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
191
|
+
- test/rbbt/ner/test_brat.rb
|
192
|
+
- test/rbbt/ner/test_g_norm_plus.rb
|
198
193
|
- test/rbbt/ner/test_chemical_tagger.rb
|
199
194
|
- test/rbbt/ner/test_banner.rb
|
200
195
|
- test/rbbt/ner/test_token_trieNER.rb
|
@@ -202,6 +197,7 @@ test_files:
|
|
202
197
|
- test/rbbt/ner/test_segment.rb
|
203
198
|
- test/rbbt/ner/test_linnaeus.rb
|
204
199
|
- test/rbbt/ner/segment/test_transformed.rb
|
200
|
+
- test/rbbt/ner/segment/test_relationship.rb
|
205
201
|
- test/rbbt/ner/segment/test_named_entity.rb
|
206
202
|
- test/rbbt/ner/segment/test_segmented.rb
|
207
203
|
- test/rbbt/ner/test_oscar4.rb
|