rbbt-text 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/rbbt/entity/document.rb +17 -26
- data/lib/rbbt/ner/abner.rb +1 -0
- data/lib/rbbt/ner/banner.rb +1 -0
- data/lib/rbbt/ner/chemical_tagger.rb +2 -0
- data/lib/rbbt/ner/finder.rb +6 -7
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +5 -7
- data/lib/rbbt/ner/oscar4.rb +8 -0
- data/lib/rbbt/ner/segment/docid.rb +1 -1
- data/lib/rbbt/ner/segment/named_entity.rb +14 -0
- data/lib/rbbt/ner/segment/transformed.rb +10 -10
- data/lib/rbbt/ner/segment.rb +13 -5
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -0
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +2 -0
- data/share/install/software/Geniass +1 -1
- data/share/install/software/OpenNLP +1 -1
- data/test/rbbt/entity/test_document.rb +9 -6
- data/test/rbbt/ner/test_finder.rb +5 -6
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +0 -1
- data/test/rbbt/ner/test_oscar4.rb +1 -1
- metadata +31 -43
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGU0NTNiMzRjZjZiOTkwMjg1ZmJlNTU0NGY1MTM4YzhkZmYwOTZmZg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NTI2YmFhMzI3OWU1NjQ3ZjZhMDUzMzc0N2VkOTAwMjMyNzAxOWVmNQ==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
N2U5NGZmOWVjODdlYjY0N2NjMmVkNmVmODJhNTVjNzI1NTA3ZmYyY2E3ZWYx
|
10
|
+
MGU0MDlmOGNkODRiYjYyYmVlYzFjOTkwMGYyMTQ2NmIxMGExZmYxMWQxMjQy
|
11
|
+
OWM2MDZlZDQxZTRmOTJlMzMzMTk4MDlhM2YwOGNmYWQxNDAwY2E=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjQyYTg2YTIzMmQyMjE5NjhlYjExZTllZGJlNjQ3ZGU3NTVlNTEyMWE0NjRl
|
14
|
+
YWQzMTUxNDVjNzQ0NWQ0YWQ5MWY2ZGI4MzJlNjI2ZWQ5OWI1OTI0MjllYzBj
|
15
|
+
NDIwYmM3OTBhNDBhMWIwYzk1Mjc3MTU0Mzc1MWE3Yzk3MDgyN2Q=
|
data/lib/rbbt/entity/document.rb
CHANGED
@@ -8,8 +8,6 @@ module Document
|
|
8
8
|
attr_accessor :corpus
|
9
9
|
end
|
10
10
|
|
11
|
-
attr_accessor :docid
|
12
|
-
|
13
11
|
property :docid => :single2array do |*args|
|
14
12
|
@docid ||= if self =~ /^text:/
|
15
13
|
self
|
@@ -19,10 +17,6 @@ module Document
|
|
19
17
|
@docid
|
20
18
|
end
|
21
19
|
|
22
|
-
#property :annotation_id => :single2array do |*args|
|
23
|
-
# docid(*args)
|
24
|
-
#end
|
25
|
-
|
26
20
|
property :annotation_id => :both do |*args|
|
27
21
|
if Array === self
|
28
22
|
Misc.hash2md5(info.merge(:self => self))
|
@@ -43,42 +37,39 @@ module Document
|
|
43
37
|
self._get_text(*args)
|
44
38
|
else
|
45
39
|
|
46
|
-
Document.corpus.
|
47
|
-
|
40
|
+
Document.corpus.read_and_close do
|
41
|
+
self.each do |doc|
|
42
|
+
id = doc.docid(*args)
|
43
|
+
case
|
44
|
+
when Document.corpus.include?(doc)
|
45
|
+
article_text[doc] = Document.corpus[doc]
|
46
|
+
when Document.corpus.include?(id)
|
47
|
+
article_text[doc] = Document.corpus[id]
|
48
|
+
else
|
49
|
+
missing << doc
|
50
|
+
end
|
48
51
|
|
49
|
-
case
|
50
|
-
when Document.corpus.include?(doc)
|
51
|
-
article_text[doc] = Document.corpus[doc]
|
52
|
-
when Document.corpus.include?(doc.docid(*args))
|
53
|
-
article_text[doc] = Document.corpus[doc.docid(*args)]
|
54
|
-
else
|
55
|
-
missing << doc
|
56
52
|
end
|
57
|
-
|
58
53
|
end
|
59
|
-
Document.corpus.close if Document.corpus.respond_to? :close
|
60
54
|
|
61
55
|
if missing.any?
|
62
56
|
missing.first.annotate missing
|
63
57
|
missing_text = Misc.process_to_hash(missing){|list| list._get_text(*args)}
|
64
58
|
|
65
59
|
Misc.lock(Document.corpus.respond_to?(:persistence_path) ? Document.corpus.persistence_path : nil) do
|
66
|
-
Document.corpus.
|
60
|
+
Document.corpus.write_and_close do
|
67
61
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
62
|
+
missing_text.each do |doc, doc_text|
|
63
|
+
doc = self.annotate doc.dup
|
64
|
+
Document.corpus[doc.docid(*args)] = doc_text
|
65
|
+
article_text[doc] = doc_text
|
66
|
+
end
|
72
67
|
end
|
73
|
-
|
74
|
-
Document.corpus.close if Document.corpus.respond_to? :close
|
75
68
|
end
|
76
|
-
|
77
69
|
end
|
78
70
|
|
79
71
|
article_text.values_at *self
|
80
72
|
end
|
81
73
|
end
|
82
|
-
|
83
74
|
end
|
84
75
|
|
data/lib/rbbt/ner/abner.rb
CHANGED
data/lib/rbbt/ner/banner.rb
CHANGED
@@ -57,6 +57,7 @@ class Banner < NER
|
|
57
57
|
text.gsub!(/\n/,' ')
|
58
58
|
text.gsub!(/\|/,'/') # Character | gives an error
|
59
59
|
return [] if text.strip.empty?
|
60
|
+
text = text.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
|
60
61
|
sentence = @@Sentence.new(text)
|
61
62
|
|
62
63
|
@tokenizer.tokenize(sentence)
|
@@ -8,6 +8,7 @@ class ChemicalTagger < NER
|
|
8
8
|
Rbbt.claim Rbbt.software.opt.ChemicalTagger, :install, Rbbt.share.install.software.ChemicalTagger.find
|
9
9
|
|
10
10
|
def self.init
|
11
|
+
ENV["CLASSPATH"] = ENV["CLASSPATH"].split(":").reverse * ":"
|
11
12
|
Rjb::load(nil, jvmargs = ['-Xms1G','-Xmx2G']) unless Rjb.loaded?
|
12
13
|
@@RbbtChemicalTagger ||= Rjb::import('RbbtChemicalTagger')
|
13
14
|
end
|
@@ -21,6 +22,7 @@ class ChemicalTagger < NER
|
|
21
22
|
matches = @@RbbtChemicalTagger.match(text)
|
22
23
|
rescue
|
23
24
|
Log.debug "ChemicalTagger Error: #{$!.message}"
|
25
|
+
ddd $!.backtrace
|
24
26
|
return []
|
25
27
|
end
|
26
28
|
|
data/lib/rbbt/ner/finder.rb
CHANGED
@@ -15,16 +15,17 @@ class Finder
|
|
15
15
|
class Instance
|
16
16
|
attr_accessor :namespace, :format, :normalizer
|
17
17
|
def initialize(path, open_options = {})
|
18
|
-
|
18
|
+
case path
|
19
|
+
when TSV
|
19
20
|
@namespace = path.namespace
|
20
21
|
@format = path.key_field
|
21
22
|
@normalizer = Normalizer.new(path)
|
22
23
|
else
|
23
24
|
open_options = Misc.add_defaults open_options, :type => :flat
|
24
|
-
|
25
|
-
@namespace =
|
26
|
-
@format =
|
27
|
-
@normalizer = Normalizer.new(
|
25
|
+
tsv = TSV.open(path, open_options)
|
26
|
+
@namespace = tsv.namespace
|
27
|
+
@format = tsv.key_field
|
28
|
+
@normalizer = Normalizer.new(tsv)
|
28
29
|
end
|
29
30
|
end
|
30
31
|
|
@@ -55,6 +56,4 @@ class Finder
|
|
55
56
|
acc += instance.find(name)
|
56
57
|
end
|
57
58
|
end
|
58
|
-
|
59
59
|
end
|
60
|
-
|
@@ -31,7 +31,7 @@ VALUE fast_start_with(VALUE str, VALUE cmp, int offset)
|
|
31
31
|
int length_cmp = RSTRING_LEN(cmp);
|
32
32
|
int length_str = RSTRING_LEN(str);
|
33
33
|
|
34
|
-
if (memcmp(RSTRING_PTR(str)+ offset, RSTRING_PTR(cmp), length_cmp) == 0){
|
34
|
+
if (memcmp(RSTRING_PTR(str) + offset, RSTRING_PTR(cmp), length_cmp) == 0){
|
35
35
|
if (length_cmp - offset == length_str || is_stop_letter(RSTRING_PTR(str)[offset + length_cmp]))
|
36
36
|
return Qtrue;
|
37
37
|
else
|
@@ -87,25 +87,23 @@ VALUE fast_start_with(VALUE str, VALUE cmp, int offset)
|
|
87
87
|
matches = []
|
88
88
|
|
89
89
|
text_offset = 0
|
90
|
+
text_chars = text.chars.to_a
|
90
91
|
text_length = text.length
|
91
92
|
while (not text_offset.nil?) and text_offset < text_length
|
92
93
|
if STOP_LETTER_CHAR_VALUES.include? text[text_offset]
|
93
94
|
text_offset += 1
|
94
95
|
next
|
95
96
|
end
|
96
|
-
ngram = text
|
97
|
+
ngram = text.slice(text_offset, 3).strip
|
98
|
+
text_byte_offset = text_offset == 0 ? 0 : text[0..text_offset-1].bytesize
|
97
99
|
|
98
100
|
found = nil
|
99
101
|
if index.include? ngram
|
100
|
-
|
101
102
|
diff = text_length - text_offset
|
102
103
|
# Match with entries
|
103
104
|
index[ngram].each do |name, code|
|
104
105
|
if name.length <= diff
|
105
|
-
|
106
|
-
# (text_offset + name.length == text_length or piece[name.length] == " "[0])
|
107
|
-
|
108
|
-
if fast_start_with(text, name, text_offset)
|
106
|
+
if fast_start_with(text, name, text_byte_offset)
|
109
107
|
found = [name.dup, code, text_offset]
|
110
108
|
break
|
111
109
|
end
|
data/lib/rbbt/ner/oscar4.rb
CHANGED
@@ -9,6 +9,12 @@ class OSCAR4 < NER
|
|
9
9
|
Rbbt.claim Rbbt.software.opt.OSCAR4, :install, Rbbt.share.install.software.OSCAR4.find
|
10
10
|
|
11
11
|
def self.init
|
12
|
+
|
13
|
+
# There is an incompatibility between the OpenNLP version in OSCAR4 and the
|
14
|
+
# one used for other matters in Rbbt, which is the most recent. We remove
|
15
|
+
# the standalone jars from the CLASSPATH
|
16
|
+
ENV["CLASSPATH"] = ENV["CLASSPATH"].split(":").select{|p| p !~ /opennlp/} * ":"
|
17
|
+
|
12
18
|
Rjb::load(nil, jvmargs = ['-Xms1G','-Xmx2G']) unless Rjb.loaded?
|
13
19
|
|
14
20
|
@@OSCAR ||= Rjb::import('uk.ac.cam.ch.wwmm.oscar.Oscar')
|
@@ -38,6 +44,8 @@ class OSCAR4 < NER
|
|
38
44
|
#inchi = inchi.getValue() unless inchi.nil?
|
39
45
|
inchi = nil
|
40
46
|
|
47
|
+
next unless entity.getType.toString == type unless type.nil?
|
48
|
+
|
41
49
|
NamedEntity.setup mention, entity.getStart, entity.getType, inchi, entity.getConfidence
|
42
50
|
|
43
51
|
result << mention
|
@@ -22,7 +22,7 @@ module SegmentWithDocid
|
|
22
22
|
def unmasked_text
|
23
23
|
return self unless masked?
|
24
24
|
tag, length = self.split(":")
|
25
|
-
Document.setup(docid).text[offset..(offset+length.to_i-1)]
|
25
|
+
Document.setup(docid).text[offset.to_i..(offset.to_i+length.to_i-1)]
|
26
26
|
end
|
27
27
|
|
28
28
|
def unmask
|
@@ -27,5 +27,19 @@ Score: #{score.inspect}
|
|
27
27
|
EOF
|
28
28
|
text.chomp
|
29
29
|
end
|
30
|
+
|
31
|
+
def entity(params = nil)
|
32
|
+
format, entity = code.split(":")
|
33
|
+
entity, format = format, nil if entity.nil?
|
34
|
+
|
35
|
+
if defined? Entity and Entity.formats.include? type or Entity.formats.include? format
|
36
|
+
params ||= {}
|
37
|
+
params[:format] = format if format and params[:format].nil?
|
38
|
+
mod = (Entity.formats[type] || Entity.format[entity])
|
39
|
+
mod.setup(entity, params)
|
40
|
+
end
|
41
|
+
|
42
|
+
entity
|
43
|
+
end
|
30
44
|
end
|
31
45
|
|
@@ -69,16 +69,16 @@ module Transformed
|
|
69
69
|
when (b.nil? or b.offset.nil?)
|
70
70
|
+1
|
71
71
|
# Non-overlap
|
72
|
-
when (a.end < b.offset or b.end < a.offset)
|
72
|
+
when (a.end < b.offset.to_i or b.end < a.offset.to_i)
|
73
73
|
b.offset <=> a.offset
|
74
74
|
# b includes a
|
75
|
-
when (a.offset >= b.offset and a.end <= b.end)
|
75
|
+
when (a.offset.to_i >= b.offset.to_i and a.end <= b.end)
|
76
76
|
-1
|
77
77
|
# b includes a
|
78
|
-
when (b.offset >= a.offset and b.end <= a.end)
|
78
|
+
when (b.offset.to_i >= a.offset.to_i and b.end <= a.end)
|
79
79
|
+1
|
80
80
|
# Overlap
|
81
|
-
when (a.offset > b.offset and a.end > b.end or b.offset < a.offset and b.end > a.end)
|
81
|
+
when (a.offset.to_i > b.offset.to_i and a.end > b.end or b.offset.to_i < a.offset.to_i and b.end > a.end)
|
82
82
|
a.length <=> b.length
|
83
83
|
else
|
84
84
|
raise "Unexpected case in sort: #{a.range} - #{b.range}"
|
@@ -99,8 +99,8 @@ module Transformed
|
|
99
99
|
|
100
100
|
shift_begin, shift_end = shift
|
101
101
|
|
102
|
-
text_offset = self.respond_to?(:offset)? self.offset : 0
|
103
|
-
updated_begin = segment.offset + shift_begin - text_offset
|
102
|
+
text_offset = self.respond_to?(:offset)? self.offset.to_i : 0
|
103
|
+
updated_begin = segment.offset.to_i + shift_begin - text_offset
|
104
104
|
updated_end = segment.range.last + shift_end - text_offset
|
105
105
|
|
106
106
|
updated_range = (updated_begin..updated_end)
|
@@ -136,11 +136,11 @@ module Transformed
|
|
136
136
|
# Before
|
137
137
|
when segment.end < range.begin
|
138
138
|
# After
|
139
|
-
when segment.offset > range.end + diff
|
140
|
-
segment.offset -= diff
|
139
|
+
when segment.offset.to_i > range.end + diff
|
140
|
+
segment.offset.to_i -= diff
|
141
141
|
# Includes
|
142
|
-
when (segment.offset <= range.begin and segment.end >= range.end + diff)
|
143
|
-
segment.replace self[segment.offset..segment.end - diff]
|
142
|
+
when (segment.offset.to_i <= range.begin and segment.end >= range.end + diff)
|
143
|
+
segment.replace self[segment.offset.to_i..segment.end - diff]
|
144
144
|
else
|
145
145
|
raise "Segment Overlaps"
|
146
146
|
end
|
data/lib/rbbt/ner/segment.rb
CHANGED
@@ -9,21 +9,24 @@ module Segment
|
|
9
9
|
@offset = offset.nil? ? nil : offset.to_i
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
12
|
def segment_length
|
14
|
-
|
13
|
+
begin
|
14
|
+
super()
|
15
|
+
rescue
|
16
|
+
self.length
|
17
|
+
end
|
15
18
|
end
|
16
19
|
|
17
20
|
#{{{ Ranges
|
18
21
|
|
19
22
|
def end
|
20
23
|
return nil if offset.nil?
|
21
|
-
offset + segment_length - 1
|
24
|
+
offset.to_i + segment_length - 1
|
22
25
|
end
|
23
26
|
|
24
27
|
def range
|
25
28
|
raise "No offset specified" if offset.nil?
|
26
|
-
(offset..self.end)
|
29
|
+
(offset.to_i..self.end)
|
27
30
|
end
|
28
31
|
|
29
32
|
def pull(offset)
|
@@ -68,6 +71,11 @@ module Segment
|
|
68
71
|
end
|
69
72
|
end
|
70
73
|
|
74
|
+
def includes?(segment)
|
75
|
+
(segment.offset.to_i >= self.offset.to_i) and
|
76
|
+
(segment.offset.to_i + segment.segment_length.to_i <= self.offset.to_i + self.segment_length.to_i)
|
77
|
+
end
|
78
|
+
|
71
79
|
#{{{ Sorting
|
72
80
|
|
73
81
|
def self.sort(segments, inline = true)
|
@@ -92,9 +100,9 @@ module Segment
|
|
92
100
|
end
|
93
101
|
|
94
102
|
def self.overlaps(sorted_segments)
|
95
|
-
|
96
103
|
last = nil
|
97
104
|
overlaped = []
|
105
|
+
|
98
106
|
sorted_segments.reverse.each do |segment|
|
99
107
|
overlaped << segment if (not last.nil?) and segment.range.end > last
|
100
108
|
last = segment.range.begin
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rbbt/nlp/nlp'
|
1
2
|
require 'rbbt/ner/segment'
|
2
3
|
module NLP
|
3
4
|
def self.returnFeatures(prevWord, delimiter, nextWord)
|
@@ -168,6 +169,7 @@ module NLP
|
|
168
169
|
end
|
169
170
|
|
170
171
|
def self.geniass_sentence_splitter_extension(text)
|
172
|
+
Rbbt.software.opt.Geniass.produce
|
171
173
|
require Rbbt.software.opt.Geniass.ruby["Geniass.so"].find
|
172
174
|
geniass = Geniass.new
|
173
175
|
if not geniass.geniass_is_loaded
|
@@ -29,6 +29,7 @@ module OpenNLP
|
|
29
29
|
def self.sentence_splitter(text)
|
30
30
|
return [] if text.nil? or text.empty?
|
31
31
|
|
32
|
+
text = Misc.to_utf8(text)
|
32
33
|
last = 0
|
33
34
|
begin
|
34
35
|
sentence_split_detector = self.sentence_split_detector
|
@@ -61,6 +62,7 @@ module OpenNLP
|
|
61
62
|
end
|
62
63
|
|
63
64
|
sentences.collect{|sentence|
|
65
|
+
sentence = Misc.to_utf8(sentence)
|
64
66
|
start = text.index(sentence, last)
|
65
67
|
Segment.setup sentence, start
|
66
68
|
last = start + sentence.length - 1
|
@@ -1,12 +1,15 @@
|
|
1
1
|
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
|
3
|
+
require 'rbbt/workflow'
|
2
4
|
require 'rbbt/entity'
|
5
|
+
Workflow.require_workflow "Genomics"
|
6
|
+
|
7
|
+
Workflow.require_workflow "TextMining"
|
8
|
+
|
3
9
|
require 'rbbt/entity/pmid'
|
4
10
|
require 'rbbt/entity/document'
|
5
11
|
require 'test/unit'
|
6
12
|
|
7
|
-
require 'rbbt/workflow'
|
8
|
-
|
9
|
-
Workflow.require_workflow "TextMining"
|
10
13
|
|
11
14
|
module Document
|
12
15
|
self.corpus = Persist.open_tokyocabinet("/tmp/corpus", false, :string, "BDB")
|
@@ -18,14 +21,14 @@ module Document
|
|
18
21
|
|
19
22
|
property :abner => :single do |*args|
|
20
23
|
normalize, organism = args
|
21
|
-
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :
|
24
|
+
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :abner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
|
22
25
|
end
|
23
26
|
|
24
27
|
persist :abner, :annotations, :dir => Rbbt.tmp.test.find(:user).entity_property
|
25
28
|
end
|
26
29
|
|
27
30
|
class TestDocument < Test::Unit::TestCase
|
28
|
-
def
|
31
|
+
def _test_pmid
|
29
32
|
pmid = "21904853"
|
30
33
|
PMID.setup(pmid)
|
31
34
|
|
@@ -33,7 +36,7 @@ class TestDocument < Test::Unit::TestCase
|
|
33
36
|
assert_match /TET2/, pmid.text
|
34
37
|
end
|
35
38
|
|
36
|
-
def
|
39
|
+
def _test_abner
|
37
40
|
pmid = "21904853"
|
38
41
|
PMID.setup(pmid)
|
39
42
|
|
@@ -8,14 +8,13 @@ require 'rbbt/sources/NCI'
|
|
8
8
|
|
9
9
|
class TestFinder < Test::Unit::TestCase
|
10
10
|
|
11
|
-
def
|
12
|
-
|
13
|
-
|
14
|
-
assert_equal "Hsa/jun2011", f.instances.first.namespace
|
11
|
+
def test_namespace_and_format
|
12
|
+
f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers("Hsa/jun2011").produce.find)))
|
13
|
+
assert_equal "Hsa", f.instances.first.namespace
|
15
14
|
assert_equal "Ensembl Gene ID", f.instances.first.format
|
16
15
|
end
|
17
16
|
|
18
|
-
def
|
17
|
+
def test_find
|
19
18
|
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["SF3B1"])
|
20
19
|
|
21
20
|
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
@@ -28,7 +27,7 @@ class TestFinder < Test::Unit::TestCase
|
|
28
27
|
def test_find
|
29
28
|
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["RASGRF2"])
|
30
29
|
|
31
|
-
|
30
|
+
assert f.find("RAS").include? "ENSG00000113319"
|
32
31
|
end
|
33
32
|
|
34
33
|
end
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Miguel Vazquez
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-10-21 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rbbt-util
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ! '>='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,7 +27,6 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: stemmer
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ! '>='
|
36
32
|
- !ruby/object:Gem::Version
|
@@ -38,7 +34,6 @@ dependencies:
|
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ! '>='
|
44
39
|
- !ruby/object:Gem::Version
|
@@ -46,7 +41,6 @@ dependencies:
|
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: libxml-ruby
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
45
|
- - ! '>='
|
52
46
|
- !ruby/object:Gem::Version
|
@@ -54,7 +48,6 @@ dependencies:
|
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
52
|
- - ! '>='
|
60
53
|
- !ruby/object:Gem::Version
|
@@ -62,7 +55,6 @@ dependencies:
|
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: json
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
59
|
- - ! '>='
|
68
60
|
- !ruby/object:Gem::Version
|
@@ -70,7 +62,6 @@ dependencies:
|
|
70
62
|
type: :runtime
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
66
|
- - ! '>='
|
76
67
|
- !ruby/object:Gem::Version
|
@@ -78,7 +69,6 @@ dependencies:
|
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: rjb
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
73
|
- - ! '>='
|
84
74
|
- !ruby/object:Gem::Version
|
@@ -86,7 +76,6 @@ dependencies:
|
|
86
76
|
type: :runtime
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
80
|
- - ! '>='
|
92
81
|
- !ruby/object:Gem::Version
|
@@ -146,75 +135,74 @@ files:
|
|
146
135
|
- share/rnorm/cue_default
|
147
136
|
- share/rnorm/tokens_default
|
148
137
|
- share/wordlists/stopwords
|
149
|
-
- test/
|
138
|
+
- test/rbbt/bow/test_misc.rb
|
150
139
|
- test/rbbt/bow/test_bow.rb
|
151
140
|
- test/rbbt/bow/test_dictionary.rb
|
152
|
-
- test/rbbt/
|
153
|
-
- test/rbbt/ner/test_regexpNER.rb
|
154
|
-
- test/rbbt/ner/test_abner.rb
|
155
|
-
- test/rbbt/ner/test_banner.rb
|
141
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
156
142
|
- test/rbbt/ner/test_NER.rb
|
157
|
-
- test/rbbt/ner/test_token_trieNER.rb
|
158
|
-
- test/rbbt/ner/test_patterns.rb
|
159
143
|
- test/rbbt/ner/segment/test_named_entity.rb
|
160
|
-
- test/rbbt/ner/segment/test_segmented.rb
|
161
144
|
- test/rbbt/ner/segment/test_transformed.rb
|
145
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
146
|
+
- test/rbbt/ner/test_patterns.rb
|
162
147
|
- test/rbbt/ner/test_segment.rb
|
163
|
-
- test/rbbt/ner/test_rnorm.rb
|
164
|
-
- test/rbbt/ner/test_oscar4.rb
|
165
|
-
- test/rbbt/ner/test_chemical_tagger.rb
|
166
|
-
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
167
148
|
- test/rbbt/ner/test_finder.rb
|
149
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
150
|
+
- test/rbbt/ner/test_token_trieNER.rb
|
151
|
+
- test/rbbt/ner/test_oscar4.rb
|
152
|
+
- test/rbbt/ner/test_abner.rb
|
153
|
+
- test/rbbt/ner/test_regexpNER.rb
|
154
|
+
- test/rbbt/ner/test_rnorm.rb
|
155
|
+
- test/rbbt/ner/test_banner.rb
|
168
156
|
- test/rbbt/ner/test_linnaeus.rb
|
169
|
-
- test/rbbt/entity/test_document.rb
|
170
157
|
- test/rbbt/nlp/test_nlp.rb
|
171
158
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
159
|
+
- test/rbbt/entity/test_document.rb
|
160
|
+
- test/test_helper.rb
|
172
161
|
- bin/get_ppis.rb
|
173
162
|
homepage: http://github.com/mikisvaz/rbbt-util
|
174
163
|
licenses: []
|
164
|
+
metadata: {}
|
175
165
|
post_install_message:
|
176
166
|
rdoc_options: []
|
177
167
|
require_paths:
|
178
168
|
- lib
|
179
169
|
required_ruby_version: !ruby/object:Gem::Requirement
|
180
|
-
none: false
|
181
170
|
requirements:
|
182
171
|
- - ! '>='
|
183
172
|
- !ruby/object:Gem::Version
|
184
173
|
version: '0'
|
185
174
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
186
|
-
none: false
|
187
175
|
requirements:
|
188
176
|
- - ! '>='
|
189
177
|
- !ruby/object:Gem::Version
|
190
178
|
version: '0'
|
191
179
|
requirements: []
|
192
180
|
rubyforge_project:
|
193
|
-
rubygems_version:
|
181
|
+
rubygems_version: 2.0.3
|
194
182
|
signing_key:
|
195
|
-
specification_version:
|
183
|
+
specification_version: 4
|
196
184
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
197
185
|
test_files:
|
198
|
-
- test/
|
186
|
+
- test/rbbt/bow/test_misc.rb
|
199
187
|
- test/rbbt/bow/test_bow.rb
|
200
188
|
- test/rbbt/bow/test_dictionary.rb
|
201
|
-
- test/rbbt/
|
202
|
-
- test/rbbt/ner/test_regexpNER.rb
|
203
|
-
- test/rbbt/ner/test_abner.rb
|
204
|
-
- test/rbbt/ner/test_banner.rb
|
189
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
205
190
|
- test/rbbt/ner/test_NER.rb
|
206
|
-
- test/rbbt/ner/test_token_trieNER.rb
|
207
|
-
- test/rbbt/ner/test_patterns.rb
|
208
191
|
- test/rbbt/ner/segment/test_named_entity.rb
|
209
|
-
- test/rbbt/ner/segment/test_segmented.rb
|
210
192
|
- test/rbbt/ner/segment/test_transformed.rb
|
193
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
194
|
+
- test/rbbt/ner/test_patterns.rb
|
211
195
|
- test/rbbt/ner/test_segment.rb
|
212
|
-
- test/rbbt/ner/test_rnorm.rb
|
213
|
-
- test/rbbt/ner/test_oscar4.rb
|
214
|
-
- test/rbbt/ner/test_chemical_tagger.rb
|
215
|
-
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
216
196
|
- test/rbbt/ner/test_finder.rb
|
197
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
198
|
+
- test/rbbt/ner/test_token_trieNER.rb
|
199
|
+
- test/rbbt/ner/test_oscar4.rb
|
200
|
+
- test/rbbt/ner/test_abner.rb
|
201
|
+
- test/rbbt/ner/test_regexpNER.rb
|
202
|
+
- test/rbbt/ner/test_rnorm.rb
|
203
|
+
- test/rbbt/ner/test_banner.rb
|
217
204
|
- test/rbbt/ner/test_linnaeus.rb
|
218
|
-
- test/rbbt/entity/test_document.rb
|
219
205
|
- test/rbbt/nlp/test_nlp.rb
|
220
206
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
207
|
+
- test/rbbt/entity/test_document.rb
|
208
|
+
- test/test_helper.rb
|