rbbt-text 1.1.7 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/get_ppis.rb +5 -5
- data/lib/rbbt/bow/dictionary.rb +0 -3
- data/lib/rbbt/corpus/document.rb +3 -3
- data/lib/rbbt/corpus/sources/pubmed.rb +2 -1
- data/lib/rbbt/ner/abner.rb +1 -0
- data/lib/rbbt/ner/banner.rb +1 -0
- data/lib/rbbt/ner/brat.rb +30 -0
- data/lib/rbbt/ner/g_norm_plus.rb +80 -0
- data/lib/rbbt/ner/linnaeus.rb +1 -1
- data/lib/rbbt/ner/segment.rb +26 -4
- data/lib/rbbt/ner/segment/named_entity.rb +1 -0
- data/lib/rbbt/ner/segment/relationship.rb +11 -7
- data/lib/rbbt/ner/segment/transformed.rb +44 -33
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +65 -0
- data/lib/rbbt/nlp/nlp.rb +5 -66
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +8 -4
- data/share/install/software/GNormPlus +7 -0
- data/share/install/software/Gdep +1 -1
- data/share/install/software/OpenNLP +1 -1
- data/test/rbbt/ner/segment/test_named_entity.rb +24 -1
- data/test/rbbt/ner/segment/test_relationship.rb +0 -0
- data/test/rbbt/ner/segment/test_transformed.rb +72 -2
- data/test/rbbt/ner/test_brat.rb +64 -0
- data/test/rbbt/ner/test_g_norm_plus.rb +16 -0
- data/test/rbbt/ner/test_segment.rb +0 -1
- data/test/rbbt/nlp/genia/test_sentence_splitter.rb +9 -0
- data/test/rbbt/nlp/open_nlp/test_sentence_splitter.rb +4 -1
- metadata +14 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: ea1646b5f32644bb5872f57422534b49955f988df26df4a65c8dda592515eac3
|
4
|
+
data.tar.gz: 3f6bc60546b79c76b6b35840712453616c377fcc088f321e95847f116776bef1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9376c68bad67733b5771b57ead7c962d45ff29c44362d1c51bf3480d3c3bf9f1f75284e40044fc4ed95bd94a03ab0759b3b7320bf1e3da00a0cdd82255c9395c
|
7
|
+
data.tar.gz: cd25a9cd91fde366be195801d45238d555edfc94f2b06391db7db2d9f4781b34dd599514385782d6c7e22af2841c5f3322ba74bf0a3a9c1fdbe308a255f00098
|
data/bin/get_ppis.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require 'rbbt-util'
|
4
|
-
require 'rbbt/
|
5
|
-
require 'rbbt/
|
6
|
-
require 'rbbt/annotations/relationships/ppi'
|
4
|
+
require 'rbbt/corpus/corpus'
|
5
|
+
require 'rbbt/corpus/sources/pubmed'
|
6
|
+
#require 'rbbt/annotations/relationships/ppi'
|
7
7
|
require 'rbbt/sources/pubmed'
|
8
|
-
require 'rbbt/ner/annotations'
|
8
|
+
#require 'rbbt/ner/annotations'
|
9
9
|
require 'rbbt/ner/token_trieNER'
|
10
|
-
require 'rbbt/ner/annotations/transformed'
|
10
|
+
#require 'rbbt/ner/annotations/transformed'
|
11
11
|
require 'rbbt/ner/chemical_tagger'
|
12
12
|
|
13
13
|
Corpus.define_entity_ner "Compounds", false do |doc|
|
data/lib/rbbt/bow/dictionary.rb
CHANGED
data/lib/rbbt/corpus/document.rb
CHANGED
@@ -8,10 +8,10 @@ require 'json'
|
|
8
8
|
|
9
9
|
class Document
|
10
10
|
|
11
|
-
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :segments, :
|
11
|
+
attr_accessor :text, :docid, :namespace, :id, :type, :hash, :segments, :segment_indices, :persist_dir, :global_persistence
|
12
12
|
def initialize(persist_dir = nil, docid = nil, text = nil, global_persistence = nil)
|
13
13
|
@segments = {}
|
14
|
-
@
|
14
|
+
@segment_indices = {}
|
15
15
|
|
16
16
|
if not persist_dir.nil?
|
17
17
|
@persist_dir = persist_dir
|
@@ -236,7 +236,7 @@ class Document
|
|
236
236
|
end
|
237
237
|
|
238
238
|
def segment_index(name, persist_dir = nil)
|
239
|
-
@
|
239
|
+
@segment_indices[name] ||= Segment.index(self.send(name), persist_dir.nil? ? :memory : File.join(persist_dir, name + '.range'))
|
240
240
|
end
|
241
241
|
|
242
242
|
def load_into(segment, *annotations)
|
@@ -10,8 +10,9 @@ class Corpus
|
|
10
10
|
type = nil if String === type and type.empty?
|
11
11
|
|
12
12
|
PubMed.get_article(pmids).collect do |pmid, article|
|
13
|
+
add_document(article.title, :pubmed, pmid, :title)
|
13
14
|
if (type.nil? and article.pdf_url.nil?) or (not type.nil? and type.to_sym === :abstract)
|
14
|
-
add_document(article.
|
15
|
+
add_document(article.abstract || "", :pubmed, pmid, :abstract)
|
15
16
|
else
|
16
17
|
raise "No FullText available for #{ pmid }" if article.pdf_url.nil?
|
17
18
|
add_document(article.full_text, :pubmed, pmid, :fulltext)
|
data/lib/rbbt/ner/abner.rb
CHANGED
@@ -11,6 +11,7 @@ class Abner < NER
|
|
11
11
|
Rbbt.claim Rbbt.software.opt.ABNER, :install, Rbbt.share.install.software.ABNER.find
|
12
12
|
|
13
13
|
def self.init
|
14
|
+
Rbbt.software.opt.ABNER.produce
|
14
15
|
@@JFile ||= Rjb::import('java.io.File')
|
15
16
|
@@Tagger ||= Rjb::import('abner.Tagger')
|
16
17
|
@@Trainer ||= Rjb::import('abner.Trainer')
|
data/lib/rbbt/ner/banner.rb
CHANGED
@@ -10,6 +10,7 @@ class Banner < NER
|
|
10
10
|
Rbbt.claim Rbbt.software.opt.BANNER, :install, Rbbt.share.install.software.BANNER.find
|
11
11
|
|
12
12
|
def self.init
|
13
|
+
Rbbt.software.opt.BANNER.produce
|
13
14
|
@@JFile ||= Rjb::import('java.io.File')
|
14
15
|
@@SimpleTokenizer ||= Rjb::import('banner.tokenization.SimpleTokenizer')
|
15
16
|
@@CRFTagger ||= Rjb::import('banner.tagging.CRFTagger')
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rbbt/ner/segment/named_entity'
|
2
|
+
require 'rbbt/ner/segment/relationship'
|
3
|
+
module Brat
|
4
|
+
Rbbt.claim Rbbt.software.opt.Brat, :install, "https://github.com/nlplab/brat.git"
|
5
|
+
|
6
|
+
def self.load(file)
|
7
|
+
entities = {}
|
8
|
+
relationships = {}
|
9
|
+
entity_ids = {}
|
10
|
+
TSV.traverse file, :type => :array do |line|
|
11
|
+
id, info, literal = line.split("\t")
|
12
|
+
case id[0]
|
13
|
+
when "T"
|
14
|
+
type, start, eend = info.split(" ")
|
15
|
+
entities[id] = NamedEntity.setup(literal, :offset => start.to_i, :type => type)
|
16
|
+
when "#"
|
17
|
+
type, id = info.split(" ")
|
18
|
+
entities[id].code = literal unless entities[id].nil?
|
19
|
+
when "R"
|
20
|
+
type, *args = info.split(" ")
|
21
|
+
tf, tg = args.collect{|e| e.split(":").last }
|
22
|
+
tf = entities[tf]
|
23
|
+
tg = entities[tg]
|
24
|
+
relationship = Relationship.setup([tf,tg] * "~" + "#" + type, :terms => [tf,tg], :type => type)
|
25
|
+
relationships[id] = relationship
|
26
|
+
end
|
27
|
+
end
|
28
|
+
[entities.values, relationships.values]
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require 'rbbt-util'
|
2
|
+
module GNormPlus
|
3
|
+
|
4
|
+
Rbbt.claim Rbbt.software.opt.GNormPlus, :install do
|
5
|
+
url = "https://www.ncbi.nlm.nih.gov/CBBresearch/Lu/Demo/tmTools/download/GNormPlus/GNormPlusJava.zip"
|
6
|
+
script =<<-EOF
|
7
|
+
(cd $(opt_dir $name); sh Installation.sh; chmod +x Ab3P identify_abbr)
|
8
|
+
EOF
|
9
|
+
{:src => url, :commands => script}
|
10
|
+
end
|
11
|
+
|
12
|
+
CONFIG =<<-EOF
|
13
|
+
|
14
|
+
#===Annotation
|
15
|
+
#Attribution setting:
|
16
|
+
#FocusSpecies = Taxonomy ID
|
17
|
+
# All: All species
|
18
|
+
# 9606: Human
|
19
|
+
# 4932: yeast
|
20
|
+
# 7227: Fly
|
21
|
+
# 10090: Mouse
|
22
|
+
# 10116: Rat
|
23
|
+
# 7955: Zebrafish
|
24
|
+
# 3702: Arabidopsis thaliana
|
25
|
+
#open: True
|
26
|
+
#close: False
|
27
|
+
|
28
|
+
[Focus Species]
|
29
|
+
FocusSpecies = All
|
30
|
+
[Dictionary & Model]
|
31
|
+
DictionaryFolder = ./Dictionary
|
32
|
+
GNRModel = ./Dictionary/GNR.Model
|
33
|
+
SCModel = ./Dictionary/SimConcept.Model
|
34
|
+
GeneIDMatch = True
|
35
|
+
Normalization2Protein = False
|
36
|
+
DeleteTmp = True
|
37
|
+
EOF
|
38
|
+
|
39
|
+
def self.process(texts)
|
40
|
+
TmpFile.with_file do |tmpdir|
|
41
|
+
Open.mkdir tmpdir
|
42
|
+
Misc.in_dir tmpdir do
|
43
|
+
Open.ln_s Rbbt.software.opt.GNormPlus.Dictionary.find, '.'
|
44
|
+
Open.ln_s Rbbt.software.opt.GNormPlus["BioC.dtd"].find, '.'
|
45
|
+
Open.ln_s Rbbt.software.opt.GNormPlus["Ab3P"].find, '.'
|
46
|
+
Open.ln_s Rbbt.software.opt.GNormPlus["CRF"].find, '.'
|
47
|
+
Open.mkdir 'input'
|
48
|
+
Open.mkdir 'output'
|
49
|
+
Open.mkdir 'tmp'
|
50
|
+
|
51
|
+
texts.each do |name,text|
|
52
|
+
Open.write("input/#{name}.txt") do |f|
|
53
|
+
f.puts "#{name}|a|" << text
|
54
|
+
f.puts
|
55
|
+
end
|
56
|
+
end
|
57
|
+
Open.write('config', CONFIG)
|
58
|
+
CMD.cmd_log("java -Xmx20G -Xms20G -jar '#{Rbbt.software.opt.GNormPlus.find}/GNormPlus.jar' 'input' 'output' 'config'")
|
59
|
+
|
60
|
+
if texts.respond_to? :key_field
|
61
|
+
key_field = texts.key_field
|
62
|
+
else
|
63
|
+
key_field = "ID"
|
64
|
+
end
|
65
|
+
tsv = TSV.setup({}, :key_field => key_field, :fields => ["Entities"], :type => :flat)
|
66
|
+
Dir.glob("output/*.txt").each do |file|
|
67
|
+
name = File.basename(file).sub(".txt",'')
|
68
|
+
entities = Open.read(file).split("\n")[1..-1].collect{|l| l.gsub(':', '.').split("\t")[1..-1] * ":"}
|
69
|
+
tsv[name] = entities
|
70
|
+
end
|
71
|
+
tsv
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
if __FILE__ == $0
|
78
|
+
Log.severity = 0
|
79
|
+
Rbbt.software.opt.GNormPlus.produce
|
80
|
+
end
|
data/lib/rbbt/ner/linnaeus.rb
CHANGED
@@ -8,8 +8,8 @@ module Linnaeus
|
|
8
8
|
|
9
9
|
ARGS = ["--properties", Rbbt.software.opt.Linnaeus["species-proxy/properties.conf"].find]
|
10
10
|
|
11
|
-
Rjb::load(nil, jvmargs = ['-Xms2G','-Xmx2G']) unless Rjb.loaded?
|
12
11
|
|
12
|
+
Rjb::load(nil, jvmargs = ['-Xms2G','-Xmx2G']) unless Rjb.loaded?
|
13
13
|
def self.init
|
14
14
|
begin
|
15
15
|
@@ArgParser = Rjb::import('martin.common.ArgParser')
|
data/lib/rbbt/ner/segment.rb
CHANGED
@@ -72,6 +72,17 @@ module Segment
|
|
72
72
|
(segment.offset.to_i + segment.segment_length.to_i <= self.offset.to_i + self.segment_length.to_i)
|
73
73
|
end
|
74
74
|
|
75
|
+
def overlaps?(segment)
|
76
|
+
segment.offset.to_i >= self.offset.to_i && segment.offset.to_i <= self.end ||
|
77
|
+
self.offset.to_i >= segment.offset.to_i && self.offset.to_i <= segment.end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.collisions(main, secondary)
|
81
|
+
collisions = secondary.select do |ss|
|
82
|
+
collisions = main.select{|ms| ms.overlaps? ss }.any?
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
75
86
|
#{{{ Sorting
|
76
87
|
|
77
88
|
def self.sort(segments, inline = true)
|
@@ -84,14 +95,14 @@ module Segment
|
|
84
95
|
-1
|
85
96
|
when (b.nil? or b.offset.nil?)
|
86
97
|
+1
|
87
|
-
when (not a.range.include? b.offset and not b.range.include? a.offset)
|
88
|
-
a.offset <=> b.offset
|
98
|
+
when (not a.range.include? b.offset.to_i and not b.range.include? a.offset.to_i)
|
99
|
+
a.offset.to_i <=> b.offset.to_i
|
89
100
|
else
|
90
101
|
a.segment_length <=> b.segment_length
|
91
102
|
end
|
92
103
|
end
|
93
104
|
else
|
94
|
-
segments.sort_by do |segment| segment.offset || 0 end.reverse
|
105
|
+
segments.sort_by do |segment| segment.offset.to_i || 0 end.reverse
|
95
106
|
end
|
96
107
|
end
|
97
108
|
|
@@ -282,7 +293,7 @@ module Segment
|
|
282
293
|
|
283
294
|
info[:annotation_types] = [Segment] unless info.include? :annotation_types
|
284
295
|
|
285
|
-
Annotated.
|
296
|
+
Annotated.load_entity(object, info)
|
286
297
|
end
|
287
298
|
|
288
299
|
def self.set_tsv_fields(fields, segments)
|
@@ -324,5 +335,16 @@ module Segment
|
|
324
335
|
end
|
325
336
|
end
|
326
337
|
|
338
|
+
def ansi(color)
|
339
|
+
Log.color color, self
|
340
|
+
end
|
341
|
+
|
342
|
+
def locus
|
343
|
+
[offset, self.end] * ".."
|
344
|
+
end
|
345
|
+
|
346
|
+
def ==(other)
|
347
|
+
self.id == other.id
|
348
|
+
end
|
327
349
|
end
|
328
350
|
|
@@ -2,19 +2,23 @@ require 'rbbt/ner/segment'
|
|
2
2
|
|
3
3
|
module Relationship
|
4
4
|
extend Annotation
|
5
|
-
|
5
|
+
self.annotation :segment
|
6
6
|
self.annotation :terms
|
7
|
+
self.annotation :type
|
8
|
+
|
9
|
+
def text
|
10
|
+
if segment
|
11
|
+
segment
|
12
|
+
else
|
13
|
+
type + ": " + terms * ", "
|
14
|
+
end
|
15
|
+
end
|
7
16
|
|
8
17
|
def html
|
9
18
|
text = <<-EOF
|
10
19
|
<span class='Relationship'\
|
11
|
-
>#{ self }</span>
|
20
|
+
>#{ self.text }</span>
|
12
21
|
EOF
|
13
22
|
text.chomp
|
14
23
|
end
|
15
|
-
|
16
|
-
def html_with_entities(*types)
|
17
|
-
annotations.values_at(*types).each do |segments|
|
18
|
-
end
|
19
|
-
end
|
20
24
|
end
|
@@ -6,7 +6,7 @@ module Transformed
|
|
6
6
|
def self.transform(text, segments, replacement = nil, &block)
|
7
7
|
|
8
8
|
text.extend Transformed
|
9
|
-
text.
|
9
|
+
text.replace_segments(segments, replacement, &block)
|
10
10
|
|
11
11
|
text
|
12
12
|
end
|
@@ -14,11 +14,11 @@ module Transformed
|
|
14
14
|
def self.with_transform(text, segments, replacement = nil)
|
15
15
|
|
16
16
|
text.extend Transformed
|
17
|
-
text.
|
17
|
+
text.replace_segments(segments, replacement)
|
18
18
|
|
19
19
|
segments = yield text
|
20
20
|
|
21
|
-
segments = nil unless Array === segments
|
21
|
+
segments = nil unless Array === segments && Segment === segments.first
|
22
22
|
|
23
23
|
text.restore(segments, true)
|
24
24
|
end
|
@@ -59,39 +59,41 @@ module Transformed
|
|
59
59
|
[begin_shift, end_shift]
|
60
60
|
end
|
61
61
|
|
62
|
-
def self.sort(segments)
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
def
|
62
|
+
#def self.sort(segments)
|
63
|
+
# segments.compact.sort do |a,b|
|
64
|
+
# case
|
65
|
+
# when ((a.nil? && b.nil?) || (a.offset.nil? && b.offset.nil?))
|
66
|
+
# 0
|
67
|
+
# when (a.nil? || a.offset.nil?)
|
68
|
+
# -1
|
69
|
+
# when (b.nil? || b.offset.nil?)
|
70
|
+
# +1
|
71
|
+
# # Non-overlap
|
72
|
+
# when (a.end < b.offset.to_i || b.end < a.offset.to_i)
|
73
|
+
# b.offset <=> a.offset
|
74
|
+
# # b includes a
|
75
|
+
# when (a.offset.to_i >= b.offset.to_i && a.end <= b.end)
|
76
|
+
# -1
|
77
|
+
# # b includes a
|
78
|
+
# when (b.offset.to_i >= a.offset.to_i && b.end <= a.end)
|
79
|
+
# +1
|
80
|
+
# # Overlap
|
81
|
+
# when (a.offset.to_i > b.offset.to_i && a.end > b.end || b.offset.to_i > a.offset.to_i && b.end > a.end)
|
82
|
+
# b.length <=> a.length
|
83
|
+
# else
|
84
|
+
# raise "Unexpected case in sort: #{a.range} - #{b.range}"
|
85
|
+
# end
|
86
|
+
# end
|
87
|
+
#end
|
88
|
+
|
89
|
+
def replace_segments(segments, replacement = nil, &block)
|
90
90
|
@transformed_segments ||= {}
|
91
91
|
@transformation_stack ||= []
|
92
92
|
stack = []
|
93
93
|
|
94
|
-
|
94
|
+
segments = [segments] unless Array === segments
|
95
|
+
orig_length = self.length
|
96
|
+
Segment.sort(segments).each do |segment|
|
95
97
|
next if segment.offset.nil?
|
96
98
|
shift = shift segment.range
|
97
99
|
|
@@ -106,6 +108,10 @@ module Transformed
|
|
106
108
|
updated_range = (updated_begin..updated_end)
|
107
109
|
|
108
110
|
updated_text = self[updated_begin..updated_end]
|
111
|
+
if updated_text.nil?
|
112
|
+
Log.warn "Range outside of segment: #{self.length} #{segment.locus} (#{updated_range})"
|
113
|
+
next
|
114
|
+
end
|
109
115
|
|
110
116
|
original_text = segment.dup
|
111
117
|
segment.replace updated_text
|
@@ -137,7 +143,7 @@ module Transformed
|
|
137
143
|
when segment.end < range.begin
|
138
144
|
# After
|
139
145
|
when segment.offset.to_i > range.end + diff
|
140
|
-
segment.offset.to_i
|
146
|
+
segment.offset = segment.offset.to_i - diff
|
141
147
|
# Includes
|
142
148
|
when (segment.offset.to_i <= range.begin and segment.end >= range.end + diff)
|
143
149
|
segment.replace self[segment.offset.to_i..segment.end - diff]
|
@@ -170,4 +176,9 @@ module Transformed
|
|
170
176
|
segments
|
171
177
|
end
|
172
178
|
end
|
179
|
+
|
180
|
+
def self.ansi(text, entities, colors = nil)
|
181
|
+
|
182
|
+
|
183
|
+
end
|
173
184
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'rbbt/nlp/nlp'
|
2
2
|
require 'rbbt/ner/segment'
|
3
3
|
module NLP
|
4
|
+
Rbbt.claim Rbbt.software.opt.Geniass, :install, Rbbt.share.install.software.Geniass.find
|
5
|
+
|
4
6
|
def self.returnFeatures(prevWord, delimiter, nextWord)
|
5
7
|
if nextWord.match(/__ss__/)
|
6
8
|
nw = nextWord.sub(/__ss__/, "")
|
@@ -235,4 +237,67 @@ module NLP
|
|
235
237
|
end
|
236
238
|
|
237
239
|
end
|
240
|
+
|
241
|
+
def self.geniass_sentence_splitter(text)
|
242
|
+
offsets = []
|
243
|
+
|
244
|
+
cleaned = text.gsub("\n",NEW_LINE_MASK)
|
245
|
+
TmpFile.with_file(cleaned) do |fin|
|
246
|
+
TmpFile.with_file do |fout|
|
247
|
+
CMD.cmd("cd #{Rbbt.software.opt.Geniass.find}; ./geniass #{ fin } #{ fout }")
|
248
|
+
|
249
|
+
|
250
|
+
Open.write(fin, Open.read(fin).gsub(NEW_LINE_MASK, "\n"))
|
251
|
+
Open.write(fout, Open.read(fout).gsub("\n", '|').gsub(NEW_LINE_MASK, "\n"))
|
252
|
+
# Addapted from sentence2standOff.rb in Geniass package
|
253
|
+
|
254
|
+
inTxtStrict = Open.open(fin)
|
255
|
+
inTxtNew = Open.open(fout)
|
256
|
+
|
257
|
+
marker = "|"[0]
|
258
|
+
position = 0
|
259
|
+
sentenceCount = 1
|
260
|
+
target = ''
|
261
|
+
targetNew = ''
|
262
|
+
start = 0
|
263
|
+
finish = 0
|
264
|
+
|
265
|
+
while(!inTxtNew.eof?) do
|
266
|
+
targetNew = inTxtNew.getc
|
267
|
+
target = inTxtStrict.getc
|
268
|
+
position += 1
|
269
|
+
if targetNew == marker
|
270
|
+
sentenceCount += 1
|
271
|
+
finish = position - 1
|
272
|
+
offsets << [start, finish] if finish - start > 10
|
273
|
+
if targetNew == target
|
274
|
+
start = position
|
275
|
+
else
|
276
|
+
targetNew = inTxtNew.getc
|
277
|
+
while targetNew != target do
|
278
|
+
target = inTxtStrict.getc
|
279
|
+
position += 1
|
280
|
+
end
|
281
|
+
start = position - 1
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
finish = position - 1
|
287
|
+
offsets << [start, finish] if finish > start
|
288
|
+
|
289
|
+
inTxtStrict.close
|
290
|
+
inTxtNew.close
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
offsets.collect do |s,e|
|
295
|
+
sentence = text[s..e]
|
296
|
+
next if sentence.nil?
|
297
|
+
#sentence.gsub!(NEW_LINE_MASK, "\n")
|
298
|
+
Segment.setup sentence, s
|
299
|
+
sentence
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
238
303
|
end
|
data/lib/rbbt/nlp/nlp.rb
CHANGED
@@ -16,76 +16,10 @@ module NLP
|
|
16
16
|
#Rbbt.software.opt.StanfordParser.define_as_install Rbbt.share.install.software.StanfordParser.find
|
17
17
|
#Rbbt.software.opt.StanfordParser.produce
|
18
18
|
|
19
|
-
Rbbt.claim Rbbt.software.opt.Geniass, :install, Rbbt.share.install.software.Geniass.find
|
20
|
-
Rbbt.software.opt.Geniass.produce
|
21
|
-
|
22
19
|
Rbbt.claim Rbbt.software.opt.Gdep, :install, Rbbt.share.install.software.Gdep.find
|
23
|
-
Rbbt.software.opt.Gdep.produce
|
24
20
|
|
25
21
|
NEW_LINE_MASK = "\t\t \t \t"
|
26
22
|
|
27
|
-
def self.geniass_sentence_splitter(text)
|
28
|
-
offsets = []
|
29
|
-
|
30
|
-
cleaned = text.gsub("\n",NEW_LINE_MASK)
|
31
|
-
TmpFile.with_file(cleaned) do |fin|
|
32
|
-
TmpFile.with_file do |fout|
|
33
|
-
CMD.cmd("cd #{Rbbt.software.opt.Geniass.find}; ./geniass #{ fin } #{ fout }")
|
34
|
-
|
35
|
-
|
36
|
-
Open.write(fin, Open.read(fin).gsub(NEW_LINE_MASK, "\n"))
|
37
|
-
Open.write(fout, Open.read(fout).gsub("\n", '|').gsub(NEW_LINE_MASK, "\n"))
|
38
|
-
# Addapted from sentence2standOff.rb in Geniass package
|
39
|
-
|
40
|
-
inTxtStrict = Open.open(fin)
|
41
|
-
inTxtNew = Open.open(fout)
|
42
|
-
|
43
|
-
marker = "|"[0]
|
44
|
-
position = 0
|
45
|
-
sentenceCount = 1
|
46
|
-
target = ''
|
47
|
-
targetNew = ''
|
48
|
-
start = 0
|
49
|
-
finish = 0
|
50
|
-
|
51
|
-
while(!inTxtNew.eof?) do
|
52
|
-
targetNew = inTxtNew.getc
|
53
|
-
target = inTxtStrict.getc
|
54
|
-
position += 1
|
55
|
-
if targetNew == marker
|
56
|
-
sentenceCount += 1
|
57
|
-
finish = position - 1
|
58
|
-
offsets << [start, finish] if finish - start > 10
|
59
|
-
if targetNew == target
|
60
|
-
start = position
|
61
|
-
else
|
62
|
-
targetNew = inTxtNew.getc
|
63
|
-
while targetNew != target do
|
64
|
-
target = inTxtStrict.getc
|
65
|
-
position += 1
|
66
|
-
end
|
67
|
-
start = position - 1
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
finish = position - 1
|
73
|
-
offsets << [start, finish] if finish > start
|
74
|
-
|
75
|
-
inTxtStrict.close
|
76
|
-
inTxtNew.close
|
77
|
-
end
|
78
|
-
end
|
79
|
-
|
80
|
-
offsets.collect do |s,e|
|
81
|
-
sentence = text[s..e]
|
82
|
-
next if sentence.nil?
|
83
|
-
#sentence.gsub!(NEW_LINE_MASK, "\n")
|
84
|
-
Segment.setup sentence, s
|
85
|
-
sentence
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
23
|
module GdepToken
|
90
24
|
extend Annotation
|
91
25
|
include Segment
|
@@ -219,3 +153,8 @@ module NLP
|
|
219
153
|
end
|
220
154
|
end
|
221
155
|
end
|
156
|
+
|
157
|
+
if __FILE__ == $0
|
158
|
+
Log.severity = 0
|
159
|
+
Rbbt.software.opt.Gdep.produce
|
160
|
+
end
|
@@ -6,16 +6,20 @@ require 'rbbt/resource'
|
|
6
6
|
module OpenNLP
|
7
7
|
Rbbt.claim Rbbt.software.opt.OpenNLP, :install, Rbbt.share.install.software.OpenNLP.find
|
8
8
|
|
9
|
+
|
9
10
|
Rbbt.claim Rbbt.software.opt.OpenNLP.models["da-sent.bin"], :url, "http://opennlp.sourceforge.net/models-1.5/de-sent.bin"
|
10
11
|
|
11
12
|
MAX = 5
|
12
13
|
|
13
|
-
@@FileInputStream = Rjb::import('java.io.FileInputStream')
|
14
|
-
@@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
|
15
|
-
@@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
|
16
|
-
|
17
14
|
def self.sentence_split_detector
|
18
15
|
@@sentence_split_detector ||= begin
|
16
|
+
Rbbt.software.opt.OpenNLP.produce
|
17
|
+
Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce
|
18
|
+
|
19
|
+
@@FileInputStream = Rjb::import('java.io.FileInputStream')
|
20
|
+
@@SentenceModel = Rjb::import('opennlp.tools.sentdetect.SentenceModel')
|
21
|
+
@@SentenceDetectorME = Rjb::import('opennlp.tools.sentdetect.SentenceDetectorME')
|
22
|
+
|
19
23
|
modelIn = @@FileInputStream.new(Rbbt.software.opt.OpenNLP.models["da-sent.bin"].produce.find);
|
20
24
|
|
21
25
|
model = @@SentenceModel.new(modelIn);
|
data/share/install/software/Gdep
CHANGED
@@ -6,7 +6,7 @@ class TestClass < Test::Unit::TestCase
|
|
6
6
|
def test_info
|
7
7
|
a = ["test"]
|
8
8
|
NamedEntity.setup a
|
9
|
-
assert(
|
9
|
+
assert(a.info[:code].nil?)
|
10
10
|
a.code = 10
|
11
11
|
a.offset = 100
|
12
12
|
assert a.info.include? :code
|
@@ -26,4 +26,27 @@ class TestClass < Test::Unit::TestCase
|
|
26
26
|
assert Segment.tsv([a], nil).fields.include? "code"
|
27
27
|
assert Segment.tsv([a], "literal").fields.include? "code"
|
28
28
|
end
|
29
|
+
|
30
|
+
def test_segment_brat
|
31
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
32
|
+
|
33
|
+
gene1 = "TP53"
|
34
|
+
gene1.extend NamedEntity
|
35
|
+
gene1.offset = a.index gene1
|
36
|
+
gene1.type = "Gene"
|
37
|
+
|
38
|
+
gene2 = "CDK5R1"
|
39
|
+
gene2.extend NamedEntity
|
40
|
+
gene2.offset = a.index gene2
|
41
|
+
gene2.type = "Gene"
|
42
|
+
|
43
|
+
gene3 = "TP53 gene"
|
44
|
+
gene3.extend NamedEntity
|
45
|
+
gene3.offset = a.index gene3
|
46
|
+
gene3.type = "Gene"
|
47
|
+
|
48
|
+
segments = [gene1, gene2, gene3]
|
49
|
+
assert segments.collect{|s| s.to_brat}.include? "Gene 27 35"
|
50
|
+
|
51
|
+
end
|
29
52
|
end
|
File without changes
|
@@ -2,10 +2,23 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_he
|
|
2
2
|
require 'rbbt/ner/segment/transformed'
|
3
3
|
require 'rbbt/ner/segment/named_entity'
|
4
4
|
require 'rexml/document'
|
5
|
-
require 'rand'
|
6
5
|
|
7
6
|
class TestClass < Test::Unit::TestCase
|
8
|
-
def
|
7
|
+
def test_sort
|
8
|
+
text = <<-EOF
|
9
|
+
More recently, PPAR activators were shown to inhibit the activation of inflammatory response genes (such as IL-2, IL-6, IL-8, TNF alpha and metalloproteases) by negatively interfering with the NF-kappa B, STAT and AP-1 signalling pathways in cells of the vascular wall.
|
10
|
+
EOF
|
11
|
+
|
12
|
+
entities = ["PPAR", "IL-2", "IL-6", "IL-8", "TNF alpha", "NF-kappa B", "AP-1", "STAT"].reverse.collect do |literal|
|
13
|
+
NamedEntity.setup(literal, :offset => text.index(literal))
|
14
|
+
end
|
15
|
+
|
16
|
+
Transformed.with_transform(text, entities, Proc.new{|e| "[" + e.upcase + "]" }) do
|
17
|
+
assert text.include? "such as [IL-2]"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def ___test_transform
|
9
22
|
a = "This sentence mentions the TP53 gene and the CDK5 protein"
|
10
23
|
original = a.dup
|
11
24
|
|
@@ -27,6 +40,8 @@ class TestClass < Test::Unit::TestCase
|
|
27
40
|
c[gene1.range] = "GN"
|
28
41
|
assert_equal c, Transformed.transform(a,[gene1], "GN")
|
29
42
|
|
43
|
+
iii a.transformation_offset_differences
|
44
|
+
raise
|
30
45
|
assert_equal gene2.offset, a.transformation_offset_differences.first.first.first
|
31
46
|
assert_equal gene1.offset, a.transformation_offset_differences.last.first.first
|
32
47
|
|
@@ -216,5 +231,60 @@ class TestClass < Test::Unit::TestCase
|
|
216
231
|
end
|
217
232
|
end
|
218
233
|
end
|
234
|
+
|
235
|
+
def test_nested_transform
|
236
|
+
a = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
237
|
+
|
238
|
+
gene1 = "TP53"
|
239
|
+
gene1.extend NamedEntity
|
240
|
+
gene1.offset = a.index gene1
|
241
|
+
gene1.type = "Gene"
|
242
|
+
|
243
|
+
gene2 = "CDK5R1"
|
244
|
+
gene2.extend NamedEntity
|
245
|
+
gene2.offset = a.index gene2
|
246
|
+
gene2.type = "Protein"
|
247
|
+
|
248
|
+
Transformed.with_transform(a, [gene1,gene2], "[G]") do
|
249
|
+
assert_equal "This sentence mentions the [G] gene and the [G] protein", a
|
250
|
+
end
|
251
|
+
Transformed.with_transform(a, [gene1], "[G1]") do
|
252
|
+
Transformed.with_transform(a, [gene2], "[G2]") do
|
253
|
+
assert_equal "This sentence mentions the [G1] gene and the [G2] protein", a
|
254
|
+
end
|
255
|
+
end
|
256
|
+
Transformed.with_transform(a, [gene2], "[G2]") do
|
257
|
+
Transformed.with_transform(a, [gene1], "[G1]") do
|
258
|
+
assert_equal "This sentence mentions the [G1] gene and the [G2] protein", a
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
def test_offset_transform
|
264
|
+
a = "ILF can bind to purine-rich regulatory motifs such as the human T-cell leukemia virus-long terminal region and the interleukin-2 promoter."
|
265
|
+
|
266
|
+
gene1 = "ILF"
|
267
|
+
gene1.extend NamedEntity
|
268
|
+
gene1.offset = a.index gene1
|
269
|
+
gene1.type = "Gene"
|
270
|
+
|
271
|
+
gene2 = "interleukin-2"
|
272
|
+
gene2.extend NamedEntity
|
273
|
+
gene2.offset = a.index gene2
|
274
|
+
gene2.type = "Protein"
|
275
|
+
|
276
|
+
Transformed.with_transform(a, [gene1,gene2], "[G]") do
|
277
|
+
assert_equal "[G] can bind to purine-rich regulatory motifs such as the human T-cell leukemia virus-long terminal region and the [G] promoter.", a
|
278
|
+
end
|
279
|
+
|
280
|
+
offset = 100
|
281
|
+
a = Segment.setup(a, :offset => offset)
|
282
|
+
gene1.offset += offset
|
283
|
+
gene2.offset += offset
|
284
|
+
Transformed.with_transform(a, [gene1,gene2], "[G]") do
|
285
|
+
assert_equal "[G] can bind to purine-rich regulatory motifs such as the human T-cell leukemia virus-long terminal region and the [G] promoter.", a
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
219
289
|
end
|
220
290
|
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/brat'
|
3
|
+
|
4
|
+
class TestBrat < Test::Unit::TestCase
|
5
|
+
def test_load
|
6
|
+
text =<<-EOF
|
7
|
+
T2 DBTF 52 55 Nrl
|
8
|
+
#2 AnnotatorNotes T2 4901
|
9
|
+
T3 NONDBTF 80 89 rhodopsin
|
10
|
+
#3 AnnotatorNotes T3 6010
|
11
|
+
T4 BIOLOGICALPROCESS 90 105 gene expression
|
12
|
+
#4 AnnotatorNotes T4 -
|
13
|
+
T5 DBTF 127 130 Nrl
|
14
|
+
#5 AnnotatorNotes T5 4901
|
15
|
+
T7 MOLECULARFUNCTION 197 204 binding
|
16
|
+
#7 AnnotatorNotes T7 -
|
17
|
+
T8 PHENOTYPE 241 252 extended AP
|
18
|
+
#8 AnnotatorNotes T8 -
|
19
|
+
T10 DBTF 331 334 Nrl
|
20
|
+
#10 AnnotatorNotes T10 4901
|
21
|
+
T11 TISSUE 381 399 photoreceptor cell
|
22
|
+
#11 AnnotatorNotes T11 -
|
23
|
+
T12 NONDBTF 414 423 rhodopsin
|
24
|
+
#12 AnnotatorNotes T12 6010
|
25
|
+
T13 CELLULARCOMPONENT 494 501 nuclear
|
26
|
+
#13 AnnotatorNotes T13 -
|
27
|
+
T14 TISSUE 548 572 retinoblastoma cell line
|
28
|
+
#14 AnnotatorNotes T14 -
|
29
|
+
T17 NONDBTF 660 669 rhodopsin
|
30
|
+
#17 AnnotatorNotes T17 6010
|
31
|
+
T18 DBTF 676 679 Nrl
|
32
|
+
#18 AnnotatorNotes T18 4901
|
33
|
+
T19 CELLULARCOMPONENT 749 764 protein complex
|
34
|
+
#19 AnnotatorNotes T19 -
|
35
|
+
T20 DBTF 797 800 Nrl
|
36
|
+
#20 AnnotatorNotes T20 4901
|
37
|
+
T21 DBTF 853 856 Nrl
|
38
|
+
#21 AnnotatorNotes T21 4901
|
39
|
+
T22 MOLECULARFUNCTION 882 892 luciferase
|
40
|
+
#22 AnnotatorNotes T22 -
|
41
|
+
T23 DBTF 943 946 Nrl
|
42
|
+
#23 AnnotatorNotes T23 4901
|
43
|
+
T24 NONDBTF 989 998 rhodopsin
|
44
|
+
#24 AnnotatorNotes T24 6010
|
45
|
+
T26 DBTF 1110 1113 Nrl
|
46
|
+
#26 AnnotatorNotes T26 4901
|
47
|
+
T27 DBTF 1224 1227 Nrl
|
48
|
+
#27 AnnotatorNotes T27 4901
|
49
|
+
T28 DBTF 1271 1274 Nrl
|
50
|
+
#28 AnnotatorNotes T28 4901
|
51
|
+
T30 DBTF 1385 1388 Nrl
|
52
|
+
#30 AnnotatorNotes T30 4901
|
53
|
+
R1 ACTIVATION Arg1:T2 Arg2:T3
|
54
|
+
R2 ACTIVATION Arg1:T10 Arg2:T12
|
55
|
+
R3 ACTIVATION Arg1:T23 Arg2:T24
|
56
|
+
T1 DBTF 250 254 AP-1
|
57
|
+
EOF
|
58
|
+
|
59
|
+
io = StringIO.new text
|
60
|
+
iii Brat.load io
|
61
|
+
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/ner/g_norm_plus'
|
3
|
+
|
4
|
+
Log.severity = 0
|
5
|
+
class TestGNormPlus < Test::Unit::TestCase
|
6
|
+
def test_match
|
7
|
+
text =<<-EOF
|
8
|
+
We found that TP53 is regulated by MDM2 in Homo sapiens
|
9
|
+
EOF
|
10
|
+
|
11
|
+
|
12
|
+
mentions = GNormPlus.process({:file => text})
|
13
|
+
Log.tsv mentions
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
@@ -29,11 +29,14 @@ sentence. This is
|
|
29
29
|
another sentence.
|
30
30
|
EOF
|
31
31
|
|
32
|
+
iii OpenNLP.sentence_split_detector.sentDetect(text)
|
33
|
+
assert_equal 5, OpenNLP.sentence_split_detector.sentDetect(text).length
|
34
|
+
|
32
35
|
assert_equal 5, OpenNLP.sentence_splitter(text).length
|
33
36
|
assert_equal "This is a \nsentence.", OpenNLP.sentence_splitter(text)[3]
|
34
37
|
end
|
35
38
|
|
36
|
-
def
|
39
|
+
def _test_text_sentences
|
37
40
|
Misc.benchmark(100) do
|
38
41
|
OpenNLP.sentence_splitter($text).include? "Our
|
39
42
|
findings highlight the role of SMARCA4 in the pathogenesis of SMARCB1-positive
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -66,20 +66,6 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rjb
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :runtime
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
69
|
description: 'Text mining tools: named entity recognition and normalization, document
|
84
70
|
classification, bag-of-words, dictionaries, etc'
|
85
71
|
email: miguel.vazquez@fdi.ucm.es
|
@@ -100,8 +86,10 @@ files:
|
|
100
86
|
- lib/rbbt/ner/NER.rb
|
101
87
|
- lib/rbbt/ner/abner.rb
|
102
88
|
- lib/rbbt/ner/banner.rb
|
89
|
+
- lib/rbbt/ner/brat.rb
|
103
90
|
- lib/rbbt/ner/chemical_tagger.rb
|
104
91
|
- lib/rbbt/ner/finder.rb
|
92
|
+
- lib/rbbt/ner/g_norm_plus.rb
|
105
93
|
- lib/rbbt/ner/linnaeus.rb
|
106
94
|
- lib/rbbt/ner/ngram_prefix_dictionary.rb
|
107
95
|
- lib/rbbt/ner/oscar3.rb
|
@@ -125,6 +113,7 @@ files:
|
|
125
113
|
- share/install/software/ABNER
|
126
114
|
- share/install/software/BANNER
|
127
115
|
- share/install/software/ChemicalTagger
|
116
|
+
- share/install/software/GNormPlus
|
128
117
|
- share/install/software/Gdep
|
129
118
|
- share/install/software/Geniass
|
130
119
|
- share/install/software/Linnaeus
|
@@ -141,13 +130,16 @@ files:
|
|
141
130
|
- test/rbbt/bow/test_misc.rb
|
142
131
|
- test/rbbt/entity/test_document.rb
|
143
132
|
- test/rbbt/ner/segment/test_named_entity.rb
|
133
|
+
- test/rbbt/ner/segment/test_relationship.rb
|
144
134
|
- test/rbbt/ner/segment/test_segmented.rb
|
145
135
|
- test/rbbt/ner/segment/test_transformed.rb
|
146
136
|
- test/rbbt/ner/test_NER.rb
|
147
137
|
- test/rbbt/ner/test_abner.rb
|
148
138
|
- test/rbbt/ner/test_banner.rb
|
139
|
+
- test/rbbt/ner/test_brat.rb
|
149
140
|
- test/rbbt/ner/test_chemical_tagger.rb
|
150
141
|
- test/rbbt/ner/test_finder.rb
|
142
|
+
- test/rbbt/ner/test_g_norm_plus.rb
|
151
143
|
- test/rbbt/ner/test_linnaeus.rb
|
152
144
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
153
145
|
- test/rbbt/ner/test_oscar4.rb
|
@@ -156,6 +148,7 @@ files:
|
|
156
148
|
- test/rbbt/ner/test_rnorm.rb
|
157
149
|
- test/rbbt/ner/test_segment.rb
|
158
150
|
- test/rbbt/ner/test_token_trieNER.rb
|
151
|
+
- test/rbbt/nlp/genia/test_sentence_splitter.rb
|
159
152
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
160
153
|
- test/rbbt/nlp/test_nlp.rb
|
161
154
|
- test/test_helper.rb
|
@@ -177,14 +170,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
170
|
- !ruby/object:Gem::Version
|
178
171
|
version: '0'
|
179
172
|
requirements: []
|
180
|
-
|
181
|
-
rubygems_version: 2.6.13
|
173
|
+
rubygems_version: 3.0.6
|
182
174
|
signing_key:
|
183
175
|
specification_version: 4
|
184
176
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
185
177
|
test_files:
|
186
178
|
- test/rbbt/nlp/test_nlp.rb
|
187
179
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
180
|
+
- test/rbbt/nlp/genia/test_sentence_splitter.rb
|
188
181
|
- test/rbbt/bow/test_bow.rb
|
189
182
|
- test/rbbt/bow/test_misc.rb
|
190
183
|
- test/rbbt/bow/test_dictionary.rb
|
@@ -195,6 +188,8 @@ test_files:
|
|
195
188
|
- test/rbbt/ner/test_rnorm.rb
|
196
189
|
- test/rbbt/ner/test_regexpNER.rb
|
197
190
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
191
|
+
- test/rbbt/ner/test_brat.rb
|
192
|
+
- test/rbbt/ner/test_g_norm_plus.rb
|
198
193
|
- test/rbbt/ner/test_chemical_tagger.rb
|
199
194
|
- test/rbbt/ner/test_banner.rb
|
200
195
|
- test/rbbt/ner/test_token_trieNER.rb
|
@@ -202,6 +197,7 @@ test_files:
|
|
202
197
|
- test/rbbt/ner/test_segment.rb
|
203
198
|
- test/rbbt/ner/test_linnaeus.rb
|
204
199
|
- test/rbbt/ner/segment/test_transformed.rb
|
200
|
+
- test/rbbt/ner/segment/test_relationship.rb
|
205
201
|
- test/rbbt/ner/segment/test_named_entity.rb
|
206
202
|
- test/rbbt/ner/segment/test_segmented.rb
|
207
203
|
- test/rbbt/ner/test_oscar4.rb
|