rbbt-text 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/rbbt/entity/document.rb +17 -26
- data/lib/rbbt/ner/abner.rb +1 -0
- data/lib/rbbt/ner/banner.rb +1 -0
- data/lib/rbbt/ner/chemical_tagger.rb +2 -0
- data/lib/rbbt/ner/finder.rb +6 -7
- data/lib/rbbt/ner/ngram_prefix_dictionary.rb +5 -7
- data/lib/rbbt/ner/oscar4.rb +8 -0
- data/lib/rbbt/ner/segment/docid.rb +1 -1
- data/lib/rbbt/ner/segment/named_entity.rb +14 -0
- data/lib/rbbt/ner/segment/transformed.rb +10 -10
- data/lib/rbbt/ner/segment.rb +13 -5
- data/lib/rbbt/nlp/genia/sentence_splitter.rb +2 -0
- data/lib/rbbt/nlp/open_nlp/sentence_splitter.rb +2 -0
- data/share/install/software/Geniass +1 -1
- data/share/install/software/OpenNLP +1 -1
- data/test/rbbt/entity/test_document.rb +9 -6
- data/test/rbbt/ner/test_finder.rb +5 -6
- data/test/rbbt/ner/test_ngram_prefix_dictionary.rb +0 -1
- data/test/rbbt/ner/test_oscar4.rb +1 -1
- metadata +31 -43
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
OGU0NTNiMzRjZjZiOTkwMjg1ZmJlNTU0NGY1MTM4YzhkZmYwOTZmZg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
NTI2YmFhMzI3OWU1NjQ3ZjZhMDUzMzc0N2VkOTAwMjMyNzAxOWVmNQ==
|
7
|
+
!binary "U0hBNTEy":
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
N2U5NGZmOWVjODdlYjY0N2NjMmVkNmVmODJhNTVjNzI1NTA3ZmYyY2E3ZWYx
|
10
|
+
MGU0MDlmOGNkODRiYjYyYmVlYzFjOTkwMGYyMTQ2NmIxMGExZmYxMWQxMjQy
|
11
|
+
OWM2MDZlZDQxZTRmOTJlMzMzMTk4MDlhM2YwOGNmYWQxNDAwY2E=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZjQyYTg2YTIzMmQyMjE5NjhlYjExZTllZGJlNjQ3ZGU3NTVlNTEyMWE0NjRl
|
14
|
+
YWQzMTUxNDVjNzQ0NWQ0YWQ5MWY2ZGI4MzJlNjI2ZWQ5OWI1OTI0MjllYzBj
|
15
|
+
NDIwYmM3OTBhNDBhMWIwYzk1Mjc3MTU0Mzc1MWE3Yzk3MDgyN2Q=
|
data/lib/rbbt/entity/document.rb
CHANGED
@@ -8,8 +8,6 @@ module Document
|
|
8
8
|
attr_accessor :corpus
|
9
9
|
end
|
10
10
|
|
11
|
-
attr_accessor :docid
|
12
|
-
|
13
11
|
property :docid => :single2array do |*args|
|
14
12
|
@docid ||= if self =~ /^text:/
|
15
13
|
self
|
@@ -19,10 +17,6 @@ module Document
|
|
19
17
|
@docid
|
20
18
|
end
|
21
19
|
|
22
|
-
#property :annotation_id => :single2array do |*args|
|
23
|
-
# docid(*args)
|
24
|
-
#end
|
25
|
-
|
26
20
|
property :annotation_id => :both do |*args|
|
27
21
|
if Array === self
|
28
22
|
Misc.hash2md5(info.merge(:self => self))
|
@@ -43,42 +37,39 @@ module Document
|
|
43
37
|
self._get_text(*args)
|
44
38
|
else
|
45
39
|
|
46
|
-
Document.corpus.
|
47
|
-
|
40
|
+
Document.corpus.read_and_close do
|
41
|
+
self.each do |doc|
|
42
|
+
id = doc.docid(*args)
|
43
|
+
case
|
44
|
+
when Document.corpus.include?(doc)
|
45
|
+
article_text[doc] = Document.corpus[doc]
|
46
|
+
when Document.corpus.include?(id)
|
47
|
+
article_text[doc] = Document.corpus[id]
|
48
|
+
else
|
49
|
+
missing << doc
|
50
|
+
end
|
48
51
|
|
49
|
-
case
|
50
|
-
when Document.corpus.include?(doc)
|
51
|
-
article_text[doc] = Document.corpus[doc]
|
52
|
-
when Document.corpus.include?(doc.docid(*args))
|
53
|
-
article_text[doc] = Document.corpus[doc.docid(*args)]
|
54
|
-
else
|
55
|
-
missing << doc
|
56
52
|
end
|
57
|
-
|
58
53
|
end
|
59
|
-
Document.corpus.close if Document.corpus.respond_to? :close
|
60
54
|
|
61
55
|
if missing.any?
|
62
56
|
missing.first.annotate missing
|
63
57
|
missing_text = Misc.process_to_hash(missing){|list| list._get_text(*args)}
|
64
58
|
|
65
59
|
Misc.lock(Document.corpus.respond_to?(:persistence_path) ? Document.corpus.persistence_path : nil) do
|
66
|
-
Document.corpus.
|
60
|
+
Document.corpus.write_and_close do
|
67
61
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
62
|
+
missing_text.each do |doc, doc_text|
|
63
|
+
doc = self.annotate doc.dup
|
64
|
+
Document.corpus[doc.docid(*args)] = doc_text
|
65
|
+
article_text[doc] = doc_text
|
66
|
+
end
|
72
67
|
end
|
73
|
-
|
74
|
-
Document.corpus.close if Document.corpus.respond_to? :close
|
75
68
|
end
|
76
|
-
|
77
69
|
end
|
78
70
|
|
79
71
|
article_text.values_at *self
|
80
72
|
end
|
81
73
|
end
|
82
|
-
|
83
74
|
end
|
84
75
|
|
data/lib/rbbt/ner/abner.rb
CHANGED
data/lib/rbbt/ner/banner.rb
CHANGED
@@ -57,6 +57,7 @@ class Banner < NER
|
|
57
57
|
text.gsub!(/\n/,' ')
|
58
58
|
text.gsub!(/\|/,'/') # Character | gives an error
|
59
59
|
return [] if text.strip.empty?
|
60
|
+
text = text.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')
|
60
61
|
sentence = @@Sentence.new(text)
|
61
62
|
|
62
63
|
@tokenizer.tokenize(sentence)
|
@@ -8,6 +8,7 @@ class ChemicalTagger < NER
|
|
8
8
|
Rbbt.claim Rbbt.software.opt.ChemicalTagger, :install, Rbbt.share.install.software.ChemicalTagger.find
|
9
9
|
|
10
10
|
def self.init
|
11
|
+
ENV["CLASSPATH"] = ENV["CLASSPATH"].split(":").reverse * ":"
|
11
12
|
Rjb::load(nil, jvmargs = ['-Xms1G','-Xmx2G']) unless Rjb.loaded?
|
12
13
|
@@RbbtChemicalTagger ||= Rjb::import('RbbtChemicalTagger')
|
13
14
|
end
|
@@ -21,6 +22,7 @@ class ChemicalTagger < NER
|
|
21
22
|
matches = @@RbbtChemicalTagger.match(text)
|
22
23
|
rescue
|
23
24
|
Log.debug "ChemicalTagger Error: #{$!.message}"
|
25
|
+
ddd $!.backtrace
|
24
26
|
return []
|
25
27
|
end
|
26
28
|
|
data/lib/rbbt/ner/finder.rb
CHANGED
@@ -15,16 +15,17 @@ class Finder
|
|
15
15
|
class Instance
|
16
16
|
attr_accessor :namespace, :format, :normalizer
|
17
17
|
def initialize(path, open_options = {})
|
18
|
-
|
18
|
+
case path
|
19
|
+
when TSV
|
19
20
|
@namespace = path.namespace
|
20
21
|
@format = path.key_field
|
21
22
|
@normalizer = Normalizer.new(path)
|
22
23
|
else
|
23
24
|
open_options = Misc.add_defaults open_options, :type => :flat
|
24
|
-
|
25
|
-
@namespace =
|
26
|
-
@format =
|
27
|
-
@normalizer = Normalizer.new(
|
25
|
+
tsv = TSV.open(path, open_options)
|
26
|
+
@namespace = tsv.namespace
|
27
|
+
@format = tsv.key_field
|
28
|
+
@normalizer = Normalizer.new(tsv)
|
28
29
|
end
|
29
30
|
end
|
30
31
|
|
@@ -55,6 +56,4 @@ class Finder
|
|
55
56
|
acc += instance.find(name)
|
56
57
|
end
|
57
58
|
end
|
58
|
-
|
59
59
|
end
|
60
|
-
|
@@ -31,7 +31,7 @@ VALUE fast_start_with(VALUE str, VALUE cmp, int offset)
|
|
31
31
|
int length_cmp = RSTRING_LEN(cmp);
|
32
32
|
int length_str = RSTRING_LEN(str);
|
33
33
|
|
34
|
-
if (memcmp(RSTRING_PTR(str)+ offset, RSTRING_PTR(cmp), length_cmp) == 0){
|
34
|
+
if (memcmp(RSTRING_PTR(str) + offset, RSTRING_PTR(cmp), length_cmp) == 0){
|
35
35
|
if (length_cmp - offset == length_str || is_stop_letter(RSTRING_PTR(str)[offset + length_cmp]))
|
36
36
|
return Qtrue;
|
37
37
|
else
|
@@ -87,25 +87,23 @@ VALUE fast_start_with(VALUE str, VALUE cmp, int offset)
|
|
87
87
|
matches = []
|
88
88
|
|
89
89
|
text_offset = 0
|
90
|
+
text_chars = text.chars.to_a
|
90
91
|
text_length = text.length
|
91
92
|
while (not text_offset.nil?) and text_offset < text_length
|
92
93
|
if STOP_LETTER_CHAR_VALUES.include? text[text_offset]
|
93
94
|
text_offset += 1
|
94
95
|
next
|
95
96
|
end
|
96
|
-
ngram = text
|
97
|
+
ngram = text.slice(text_offset, 3).strip
|
98
|
+
text_byte_offset = text_offset == 0 ? 0 : text[0..text_offset-1].bytesize
|
97
99
|
|
98
100
|
found = nil
|
99
101
|
if index.include? ngram
|
100
|
-
|
101
102
|
diff = text_length - text_offset
|
102
103
|
# Match with entries
|
103
104
|
index[ngram].each do |name, code|
|
104
105
|
if name.length <= diff
|
105
|
-
|
106
|
-
# (text_offset + name.length == text_length or piece[name.length] == " "[0])
|
107
|
-
|
108
|
-
if fast_start_with(text, name, text_offset)
|
106
|
+
if fast_start_with(text, name, text_byte_offset)
|
109
107
|
found = [name.dup, code, text_offset]
|
110
108
|
break
|
111
109
|
end
|
data/lib/rbbt/ner/oscar4.rb
CHANGED
@@ -9,6 +9,12 @@ class OSCAR4 < NER
|
|
9
9
|
Rbbt.claim Rbbt.software.opt.OSCAR4, :install, Rbbt.share.install.software.OSCAR4.find
|
10
10
|
|
11
11
|
def self.init
|
12
|
+
|
13
|
+
# There is an incompatibility between the OpenNLP version in OSCAR4 and the
|
14
|
+
# one used for other matters in Rbbt, which is the most recent. We remove
|
15
|
+
# the standalone jars from the CLASSPATH
|
16
|
+
ENV["CLASSPATH"] = ENV["CLASSPATH"].split(":").select{|p| p !~ /opennlp/} * ":"
|
17
|
+
|
12
18
|
Rjb::load(nil, jvmargs = ['-Xms1G','-Xmx2G']) unless Rjb.loaded?
|
13
19
|
|
14
20
|
@@OSCAR ||= Rjb::import('uk.ac.cam.ch.wwmm.oscar.Oscar')
|
@@ -38,6 +44,8 @@ class OSCAR4 < NER
|
|
38
44
|
#inchi = inchi.getValue() unless inchi.nil?
|
39
45
|
inchi = nil
|
40
46
|
|
47
|
+
next unless entity.getType.toString == type unless type.nil?
|
48
|
+
|
41
49
|
NamedEntity.setup mention, entity.getStart, entity.getType, inchi, entity.getConfidence
|
42
50
|
|
43
51
|
result << mention
|
@@ -22,7 +22,7 @@ module SegmentWithDocid
|
|
22
22
|
def unmasked_text
|
23
23
|
return self unless masked?
|
24
24
|
tag, length = self.split(":")
|
25
|
-
Document.setup(docid).text[offset..(offset+length.to_i-1)]
|
25
|
+
Document.setup(docid).text[offset.to_i..(offset.to_i+length.to_i-1)]
|
26
26
|
end
|
27
27
|
|
28
28
|
def unmask
|
@@ -27,5 +27,19 @@ Score: #{score.inspect}
|
|
27
27
|
EOF
|
28
28
|
text.chomp
|
29
29
|
end
|
30
|
+
|
31
|
+
def entity(params = nil)
|
32
|
+
format, entity = code.split(":")
|
33
|
+
entity, format = format, nil if entity.nil?
|
34
|
+
|
35
|
+
if defined? Entity and Entity.formats.include? type or Entity.formats.include? format
|
36
|
+
params ||= {}
|
37
|
+
params[:format] = format if format and params[:format].nil?
|
38
|
+
mod = (Entity.formats[type] || Entity.format[entity])
|
39
|
+
mod.setup(entity, params)
|
40
|
+
end
|
41
|
+
|
42
|
+
entity
|
43
|
+
end
|
30
44
|
end
|
31
45
|
|
@@ -69,16 +69,16 @@ module Transformed
|
|
69
69
|
when (b.nil? or b.offset.nil?)
|
70
70
|
+1
|
71
71
|
# Non-overlap
|
72
|
-
when (a.end < b.offset or b.end < a.offset)
|
72
|
+
when (a.end < b.offset.to_i or b.end < a.offset.to_i)
|
73
73
|
b.offset <=> a.offset
|
74
74
|
# b includes a
|
75
|
-
when (a.offset >= b.offset and a.end <= b.end)
|
75
|
+
when (a.offset.to_i >= b.offset.to_i and a.end <= b.end)
|
76
76
|
-1
|
77
77
|
# b includes a
|
78
|
-
when (b.offset >= a.offset and b.end <= a.end)
|
78
|
+
when (b.offset.to_i >= a.offset.to_i and b.end <= a.end)
|
79
79
|
+1
|
80
80
|
# Overlap
|
81
|
-
when (a.offset > b.offset and a.end > b.end or b.offset < a.offset and b.end > a.end)
|
81
|
+
when (a.offset.to_i > b.offset.to_i and a.end > b.end or b.offset.to_i < a.offset.to_i and b.end > a.end)
|
82
82
|
a.length <=> b.length
|
83
83
|
else
|
84
84
|
raise "Unexpected case in sort: #{a.range} - #{b.range}"
|
@@ -99,8 +99,8 @@ module Transformed
|
|
99
99
|
|
100
100
|
shift_begin, shift_end = shift
|
101
101
|
|
102
|
-
text_offset = self.respond_to?(:offset)? self.offset : 0
|
103
|
-
updated_begin = segment.offset + shift_begin - text_offset
|
102
|
+
text_offset = self.respond_to?(:offset)? self.offset.to_i : 0
|
103
|
+
updated_begin = segment.offset.to_i + shift_begin - text_offset
|
104
104
|
updated_end = segment.range.last + shift_end - text_offset
|
105
105
|
|
106
106
|
updated_range = (updated_begin..updated_end)
|
@@ -136,11 +136,11 @@ module Transformed
|
|
136
136
|
# Before
|
137
137
|
when segment.end < range.begin
|
138
138
|
# After
|
139
|
-
when segment.offset > range.end + diff
|
140
|
-
segment.offset -= diff
|
139
|
+
when segment.offset.to_i > range.end + diff
|
140
|
+
segment.offset.to_i -= diff
|
141
141
|
# Includes
|
142
|
-
when (segment.offset <= range.begin and segment.end >= range.end + diff)
|
143
|
-
segment.replace self[segment.offset..segment.end - diff]
|
142
|
+
when (segment.offset.to_i <= range.begin and segment.end >= range.end + diff)
|
143
|
+
segment.replace self[segment.offset.to_i..segment.end - diff]
|
144
144
|
else
|
145
145
|
raise "Segment Overlaps"
|
146
146
|
end
|
data/lib/rbbt/ner/segment.rb
CHANGED
@@ -9,21 +9,24 @@ module Segment
|
|
9
9
|
@offset = offset.nil? ? nil : offset.to_i
|
10
10
|
end
|
11
11
|
|
12
|
-
|
13
12
|
def segment_length
|
14
|
-
|
13
|
+
begin
|
14
|
+
super()
|
15
|
+
rescue
|
16
|
+
self.length
|
17
|
+
end
|
15
18
|
end
|
16
19
|
|
17
20
|
#{{{ Ranges
|
18
21
|
|
19
22
|
def end
|
20
23
|
return nil if offset.nil?
|
21
|
-
offset + segment_length - 1
|
24
|
+
offset.to_i + segment_length - 1
|
22
25
|
end
|
23
26
|
|
24
27
|
def range
|
25
28
|
raise "No offset specified" if offset.nil?
|
26
|
-
(offset..self.end)
|
29
|
+
(offset.to_i..self.end)
|
27
30
|
end
|
28
31
|
|
29
32
|
def pull(offset)
|
@@ -68,6 +71,11 @@ module Segment
|
|
68
71
|
end
|
69
72
|
end
|
70
73
|
|
74
|
+
def includes?(segment)
|
75
|
+
(segment.offset.to_i >= self.offset.to_i) and
|
76
|
+
(segment.offset.to_i + segment.segment_length.to_i <= self.offset.to_i + self.segment_length.to_i)
|
77
|
+
end
|
78
|
+
|
71
79
|
#{{{ Sorting
|
72
80
|
|
73
81
|
def self.sort(segments, inline = true)
|
@@ -92,9 +100,9 @@ module Segment
|
|
92
100
|
end
|
93
101
|
|
94
102
|
def self.overlaps(sorted_segments)
|
95
|
-
|
96
103
|
last = nil
|
97
104
|
overlaped = []
|
105
|
+
|
98
106
|
sorted_segments.reverse.each do |segment|
|
99
107
|
overlaped << segment if (not last.nil?) and segment.range.end > last
|
100
108
|
last = segment.range.begin
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'rbbt/nlp/nlp'
|
1
2
|
require 'rbbt/ner/segment'
|
2
3
|
module NLP
|
3
4
|
def self.returnFeatures(prevWord, delimiter, nextWord)
|
@@ -168,6 +169,7 @@ module NLP
|
|
168
169
|
end
|
169
170
|
|
170
171
|
def self.geniass_sentence_splitter_extension(text)
|
172
|
+
Rbbt.software.opt.Geniass.produce
|
171
173
|
require Rbbt.software.opt.Geniass.ruby["Geniass.so"].find
|
172
174
|
geniass = Geniass.new
|
173
175
|
if not geniass.geniass_is_loaded
|
@@ -29,6 +29,7 @@ module OpenNLP
|
|
29
29
|
def self.sentence_splitter(text)
|
30
30
|
return [] if text.nil? or text.empty?
|
31
31
|
|
32
|
+
text = Misc.to_utf8(text)
|
32
33
|
last = 0
|
33
34
|
begin
|
34
35
|
sentence_split_detector = self.sentence_split_detector
|
@@ -61,6 +62,7 @@ module OpenNLP
|
|
61
62
|
end
|
62
63
|
|
63
64
|
sentences.collect{|sentence|
|
65
|
+
sentence = Misc.to_utf8(sentence)
|
64
66
|
start = text.index(sentence, last)
|
65
67
|
Segment.setup sentence, start
|
66
68
|
last = start + sentence.length - 1
|
@@ -1,12 +1,15 @@
|
|
1
1
|
require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.rb')
|
2
|
+
|
3
|
+
require 'rbbt/workflow'
|
2
4
|
require 'rbbt/entity'
|
5
|
+
Workflow.require_workflow "Genomics"
|
6
|
+
|
7
|
+
Workflow.require_workflow "TextMining"
|
8
|
+
|
3
9
|
require 'rbbt/entity/pmid'
|
4
10
|
require 'rbbt/entity/document'
|
5
11
|
require 'test/unit'
|
6
12
|
|
7
|
-
require 'rbbt/workflow'
|
8
|
-
|
9
|
-
Workflow.require_workflow "TextMining"
|
10
13
|
|
11
14
|
module Document
|
12
15
|
self.corpus = Persist.open_tokyocabinet("/tmp/corpus", false, :string, "BDB")
|
@@ -18,14 +21,14 @@ module Document
|
|
18
21
|
|
19
22
|
property :abner => :single do |*args|
|
20
23
|
normalize, organism = args
|
21
|
-
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :
|
24
|
+
TextMining.job(:gene_mention_recognition, "Factoid", :text => text, :method => :abner, :normalize => normalize, :organism => organism).exec.each{|e| SegmentWithDocid.setup(e, self.docid)}
|
22
25
|
end
|
23
26
|
|
24
27
|
persist :abner, :annotations, :dir => Rbbt.tmp.test.find(:user).entity_property
|
25
28
|
end
|
26
29
|
|
27
30
|
class TestDocument < Test::Unit::TestCase
|
28
|
-
def
|
31
|
+
def _test_pmid
|
29
32
|
pmid = "21904853"
|
30
33
|
PMID.setup(pmid)
|
31
34
|
|
@@ -33,7 +36,7 @@ class TestDocument < Test::Unit::TestCase
|
|
33
36
|
assert_match /TET2/, pmid.text
|
34
37
|
end
|
35
38
|
|
36
|
-
def
|
39
|
+
def _test_abner
|
37
40
|
pmid = "21904853"
|
38
41
|
PMID.setup(pmid)
|
39
42
|
|
@@ -8,14 +8,13 @@ require 'rbbt/sources/NCI'
|
|
8
8
|
|
9
9
|
class TestFinder < Test::Unit::TestCase
|
10
10
|
|
11
|
-
def
|
12
|
-
|
13
|
-
|
14
|
-
assert_equal "Hsa/jun2011", f.instances.first.namespace
|
11
|
+
def test_namespace_and_format
|
12
|
+
f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers("Hsa/jun2011").produce.find)))
|
13
|
+
assert_equal "Hsa", f.instances.first.namespace
|
15
14
|
assert_equal "Ensembl Gene ID", f.instances.first.format
|
16
15
|
end
|
17
16
|
|
18
|
-
def
|
17
|
+
def test_find
|
19
18
|
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["SF3B1"])
|
20
19
|
|
21
20
|
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
@@ -28,7 +27,7 @@ class TestFinder < Test::Unit::TestCase
|
|
28
27
|
def test_find
|
29
28
|
f = Finder.new(Organism.lexicon("Hsa/jun2011"), :grep => ["RASGRF2"])
|
30
29
|
|
31
|
-
|
30
|
+
assert f.find("RAS").include? "ENSG00000113319"
|
32
31
|
end
|
33
32
|
|
34
33
|
end
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
5
|
-
prerelease:
|
4
|
+
version: 1.1.0
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- Miguel Vazquez
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-10-21 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rbbt-util
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - ! '>='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - ! '>='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -30,7 +27,6 @@ dependencies:
|
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: stemmer
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
31
|
- - ! '>='
|
36
32
|
- !ruby/object:Gem::Version
|
@@ -38,7 +34,6 @@ dependencies:
|
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
38
|
- - ! '>='
|
44
39
|
- !ruby/object:Gem::Version
|
@@ -46,7 +41,6 @@ dependencies:
|
|
46
41
|
- !ruby/object:Gem::Dependency
|
47
42
|
name: libxml-ruby
|
48
43
|
requirement: !ruby/object:Gem::Requirement
|
49
|
-
none: false
|
50
44
|
requirements:
|
51
45
|
- - ! '>='
|
52
46
|
- !ruby/object:Gem::Version
|
@@ -54,7 +48,6 @@ dependencies:
|
|
54
48
|
type: :runtime
|
55
49
|
prerelease: false
|
56
50
|
version_requirements: !ruby/object:Gem::Requirement
|
57
|
-
none: false
|
58
51
|
requirements:
|
59
52
|
- - ! '>='
|
60
53
|
- !ruby/object:Gem::Version
|
@@ -62,7 +55,6 @@ dependencies:
|
|
62
55
|
- !ruby/object:Gem::Dependency
|
63
56
|
name: json
|
64
57
|
requirement: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
58
|
requirements:
|
67
59
|
- - ! '>='
|
68
60
|
- !ruby/object:Gem::Version
|
@@ -70,7 +62,6 @@ dependencies:
|
|
70
62
|
type: :runtime
|
71
63
|
prerelease: false
|
72
64
|
version_requirements: !ruby/object:Gem::Requirement
|
73
|
-
none: false
|
74
65
|
requirements:
|
75
66
|
- - ! '>='
|
76
67
|
- !ruby/object:Gem::Version
|
@@ -78,7 +69,6 @@ dependencies:
|
|
78
69
|
- !ruby/object:Gem::Dependency
|
79
70
|
name: rjb
|
80
71
|
requirement: !ruby/object:Gem::Requirement
|
81
|
-
none: false
|
82
72
|
requirements:
|
83
73
|
- - ! '>='
|
84
74
|
- !ruby/object:Gem::Version
|
@@ -86,7 +76,6 @@ dependencies:
|
|
86
76
|
type: :runtime
|
87
77
|
prerelease: false
|
88
78
|
version_requirements: !ruby/object:Gem::Requirement
|
89
|
-
none: false
|
90
79
|
requirements:
|
91
80
|
- - ! '>='
|
92
81
|
- !ruby/object:Gem::Version
|
@@ -146,75 +135,74 @@ files:
|
|
146
135
|
- share/rnorm/cue_default
|
147
136
|
- share/rnorm/tokens_default
|
148
137
|
- share/wordlists/stopwords
|
149
|
-
- test/
|
138
|
+
- test/rbbt/bow/test_misc.rb
|
150
139
|
- test/rbbt/bow/test_bow.rb
|
151
140
|
- test/rbbt/bow/test_dictionary.rb
|
152
|
-
- test/rbbt/
|
153
|
-
- test/rbbt/ner/test_regexpNER.rb
|
154
|
-
- test/rbbt/ner/test_abner.rb
|
155
|
-
- test/rbbt/ner/test_banner.rb
|
141
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
156
142
|
- test/rbbt/ner/test_NER.rb
|
157
|
-
- test/rbbt/ner/test_token_trieNER.rb
|
158
|
-
- test/rbbt/ner/test_patterns.rb
|
159
143
|
- test/rbbt/ner/segment/test_named_entity.rb
|
160
|
-
- test/rbbt/ner/segment/test_segmented.rb
|
161
144
|
- test/rbbt/ner/segment/test_transformed.rb
|
145
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
146
|
+
- test/rbbt/ner/test_patterns.rb
|
162
147
|
- test/rbbt/ner/test_segment.rb
|
163
|
-
- test/rbbt/ner/test_rnorm.rb
|
164
|
-
- test/rbbt/ner/test_oscar4.rb
|
165
|
-
- test/rbbt/ner/test_chemical_tagger.rb
|
166
|
-
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
167
148
|
- test/rbbt/ner/test_finder.rb
|
149
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
150
|
+
- test/rbbt/ner/test_token_trieNER.rb
|
151
|
+
- test/rbbt/ner/test_oscar4.rb
|
152
|
+
- test/rbbt/ner/test_abner.rb
|
153
|
+
- test/rbbt/ner/test_regexpNER.rb
|
154
|
+
- test/rbbt/ner/test_rnorm.rb
|
155
|
+
- test/rbbt/ner/test_banner.rb
|
168
156
|
- test/rbbt/ner/test_linnaeus.rb
|
169
|
-
- test/rbbt/entity/test_document.rb
|
170
157
|
- test/rbbt/nlp/test_nlp.rb
|
171
158
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
159
|
+
- test/rbbt/entity/test_document.rb
|
160
|
+
- test/test_helper.rb
|
172
161
|
- bin/get_ppis.rb
|
173
162
|
homepage: http://github.com/mikisvaz/rbbt-util
|
174
163
|
licenses: []
|
164
|
+
metadata: {}
|
175
165
|
post_install_message:
|
176
166
|
rdoc_options: []
|
177
167
|
require_paths:
|
178
168
|
- lib
|
179
169
|
required_ruby_version: !ruby/object:Gem::Requirement
|
180
|
-
none: false
|
181
170
|
requirements:
|
182
171
|
- - ! '>='
|
183
172
|
- !ruby/object:Gem::Version
|
184
173
|
version: '0'
|
185
174
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
186
|
-
none: false
|
187
175
|
requirements:
|
188
176
|
- - ! '>='
|
189
177
|
- !ruby/object:Gem::Version
|
190
178
|
version: '0'
|
191
179
|
requirements: []
|
192
180
|
rubyforge_project:
|
193
|
-
rubygems_version:
|
181
|
+
rubygems_version: 2.0.3
|
194
182
|
signing_key:
|
195
|
-
specification_version:
|
183
|
+
specification_version: 4
|
196
184
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
197
185
|
test_files:
|
198
|
-
- test/
|
186
|
+
- test/rbbt/bow/test_misc.rb
|
199
187
|
- test/rbbt/bow/test_bow.rb
|
200
188
|
- test/rbbt/bow/test_dictionary.rb
|
201
|
-
- test/rbbt/
|
202
|
-
- test/rbbt/ner/test_regexpNER.rb
|
203
|
-
- test/rbbt/ner/test_abner.rb
|
204
|
-
- test/rbbt/ner/test_banner.rb
|
189
|
+
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
205
190
|
- test/rbbt/ner/test_NER.rb
|
206
|
-
- test/rbbt/ner/test_token_trieNER.rb
|
207
|
-
- test/rbbt/ner/test_patterns.rb
|
208
191
|
- test/rbbt/ner/segment/test_named_entity.rb
|
209
|
-
- test/rbbt/ner/segment/test_segmented.rb
|
210
192
|
- test/rbbt/ner/segment/test_transformed.rb
|
193
|
+
- test/rbbt/ner/segment/test_segmented.rb
|
194
|
+
- test/rbbt/ner/test_patterns.rb
|
211
195
|
- test/rbbt/ner/test_segment.rb
|
212
|
-
- test/rbbt/ner/test_rnorm.rb
|
213
|
-
- test/rbbt/ner/test_oscar4.rb
|
214
|
-
- test/rbbt/ner/test_chemical_tagger.rb
|
215
|
-
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
216
196
|
- test/rbbt/ner/test_finder.rb
|
197
|
+
- test/rbbt/ner/test_chemical_tagger.rb
|
198
|
+
- test/rbbt/ner/test_token_trieNER.rb
|
199
|
+
- test/rbbt/ner/test_oscar4.rb
|
200
|
+
- test/rbbt/ner/test_abner.rb
|
201
|
+
- test/rbbt/ner/test_regexpNER.rb
|
202
|
+
- test/rbbt/ner/test_rnorm.rb
|
203
|
+
- test/rbbt/ner/test_banner.rb
|
217
204
|
- test/rbbt/ner/test_linnaeus.rb
|
218
|
-
- test/rbbt/entity/test_document.rb
|
219
205
|
- test/rbbt/nlp/test_nlp.rb
|
220
206
|
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
207
|
+
- test/rbbt/entity/test_document.rb
|
208
|
+
- test/test_helper.rb
|