rbbt-text 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/ner/finder.rb +5 -0
- data/lib/rbbt/ner/regexpNER.rb +9 -4
- data/lib/rbbt/ner/segment.rb +0 -4
- data/test/rbbt/ner/test_finder.rb +12 -3
- data/test/rbbt/ner/test_regexpNER.rb +1 -0
- data/test/test_helper.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 571b05a78ab3a1bb51d37604cab69773555fd331
|
4
|
+
data.tar.gz: 61983fbd5d98c8412bb2fd36d0639eb7086f9287
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25f718d2733cbd5bb2bb521bdc4d50e7386ac39a573c2424897bad98b1f8674beeec56c7162bec91c5afb555163eea995bbd740f9d855d13fc084d6248eff4cd
|
7
|
+
data.tar.gz: 184f47a53ffcb220ba68faf03343f869eb65b0f12022561355c9cf417245781ebfc46efa81a49c1a9d9d746b9e42434bbe4e9dc49cc6d70eb216ecc6f0517b48
|
data/lib/rbbt/ner/finder.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rbbt/ner/rnorm'
|
2
|
+
require 'rbbt/ner/regexpNER'
|
2
3
|
|
3
4
|
class Finder
|
4
5
|
|
@@ -20,6 +21,10 @@ class Finder
|
|
20
21
|
@namespace = path.namespace
|
21
22
|
@format = path.key_field
|
22
23
|
@normalizer = Normalizer.new(path)
|
24
|
+
when Hash
|
25
|
+
@namespace = open_options[:namespace]
|
26
|
+
@format = nil
|
27
|
+
@normalizer = RegExpNER.new(path)
|
23
28
|
else
|
24
29
|
open_options = Misc.add_defaults open_options, :type => :flat
|
25
30
|
tsv = TSV.open(path, open_options)
|
data/lib/rbbt/ner/regexpNER.rb
CHANGED
@@ -56,8 +56,8 @@ class RegExpNER < NER
|
|
56
56
|
chunks = Segment.split(text, matches)
|
57
57
|
chunks.each do |chunk|
|
58
58
|
chunk_offset = chunk.offset
|
59
|
-
match_regexp_list(chunk, regexp_list, type).
|
60
|
-
match.offset
|
59
|
+
match_regexp_list(chunk, regexp_list, type).each do |match|
|
60
|
+
match.offset = match.offset + chunk_offset;
|
61
61
|
matches << match
|
62
62
|
end
|
63
63
|
end
|
@@ -68,9 +68,12 @@ class RegExpNER < NER
|
|
68
68
|
|
69
69
|
attr_accessor :regexps
|
70
70
|
def initialize(regexps = {})
|
71
|
-
@regexps = regexps.collect
|
71
|
+
@regexps = regexps.collect{|p| p }
|
72
72
|
end
|
73
73
|
|
74
|
+
def token_score(*args)
|
75
|
+
1
|
76
|
+
end
|
74
77
|
|
75
78
|
def __define_regexp_hook(name, regexp, *args)
|
76
79
|
@regexps << [name, regexp]
|
@@ -86,7 +89,9 @@ class RegExpNER < NER
|
|
86
89
|
|
87
90
|
def match(text)
|
88
91
|
matches = RegExpNER.match_regexp_hash(text, @regexps)
|
89
|
-
matches
|
92
|
+
matches.collect do |m|
|
93
|
+
NamedEntity.setup(m, :offset => m.offset, :type => m.type, :code => m)
|
94
|
+
end
|
90
95
|
end
|
91
96
|
|
92
97
|
end
|
data/lib/rbbt/ner/segment.rb
CHANGED
@@ -10,7 +10,7 @@ class TestFinder < Test::Unit::TestCase
|
|
10
10
|
|
11
11
|
def test_namespace_and_format
|
12
12
|
f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
|
13
|
-
assert_equal "Hsa", f.instances.first.namespace
|
13
|
+
assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
|
14
14
|
assert_equal "Ensembl Gene ID", f.instances.first.format
|
15
15
|
end
|
16
16
|
|
@@ -19,15 +19,24 @@ class TestFinder < Test::Unit::TestCase
|
|
19
19
|
|
20
20
|
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
21
21
|
if defined? Entity
|
22
|
-
ddd f.find("SF3B1").first.info
|
23
22
|
assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
|
24
23
|
end
|
25
24
|
end
|
26
25
|
|
27
|
-
def
|
26
|
+
def test_find2
|
28
27
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
|
29
28
|
|
29
|
+
m = f.find("RAS").first
|
30
|
+
|
30
31
|
assert f.find("RAS").include? "ENSG00000113319"
|
31
32
|
end
|
32
33
|
|
34
|
+
def test_find_mutation
|
35
|
+
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
|
36
|
+
f.add_instance({"Genomic Mutation" => /\w+:\w+:\w+/})
|
37
|
+
|
38
|
+
m = f.find("The mutation 1:1234:A is intergenic").first
|
39
|
+
assert_equal "1:1234:A", m.info[:code]
|
40
|
+
end
|
41
|
+
|
33
42
|
end
|
@@ -79,6 +79,7 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
79
79
|
assert_equal :should, matches.select{|m| m.type == :should}[0].type
|
80
80
|
end
|
81
81
|
|
82
|
+
|
82
83
|
def test_regexp_order
|
83
84
|
text =<<-EOF
|
84
85
|
* Human AUC 0-24h= 7591 ng.h/ml at 30 mg/day In mice, dietary administration of aripiprazole at doses of 1, 3, and 10 asdf mg/kg/day for 104 weeks was
|
data/test/test_helper.rb
CHANGED
@@ -20,8 +20,8 @@ class Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
def teardown
|
22
22
|
FileUtils.rm_rf Rbbt.tmp.test.find :user
|
23
|
-
Persist::
|
24
|
-
Persist::
|
23
|
+
Persist::CONNECTIONS.values.each do |c| c.close end
|
24
|
+
Persist::CONNECTIONS.clear
|
25
25
|
DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
|
26
26
|
DocumentRepo::TC_CONNECTIONS.clear
|
27
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
178
|
version: '0'
|
179
179
|
requirements: []
|
180
180
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.
|
181
|
+
rubygems_version: 2.4.6
|
182
182
|
signing_key:
|
183
183
|
specification_version: 4
|
184
184
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|