rbbt-text 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/ner/finder.rb +5 -0
- data/lib/rbbt/ner/regexpNER.rb +9 -4
- data/lib/rbbt/ner/segment.rb +0 -4
- data/test/rbbt/ner/test_finder.rb +12 -3
- data/test/rbbt/ner/test_regexpNER.rb +1 -0
- data/test/test_helper.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 571b05a78ab3a1bb51d37604cab69773555fd331
|
4
|
+
data.tar.gz: 61983fbd5d98c8412bb2fd36d0639eb7086f9287
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25f718d2733cbd5bb2bb521bdc4d50e7386ac39a573c2424897bad98b1f8674beeec56c7162bec91c5afb555163eea995bbd740f9d855d13fc084d6248eff4cd
|
7
|
+
data.tar.gz: 184f47a53ffcb220ba68faf03343f869eb65b0f12022561355c9cf417245781ebfc46efa81a49c1a9d9d746b9e42434bbe4e9dc49cc6d70eb216ecc6f0517b48
|
data/lib/rbbt/ner/finder.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rbbt/ner/rnorm'
|
2
|
+
require 'rbbt/ner/regexpNER'
|
2
3
|
|
3
4
|
class Finder
|
4
5
|
|
@@ -20,6 +21,10 @@ class Finder
|
|
20
21
|
@namespace = path.namespace
|
21
22
|
@format = path.key_field
|
22
23
|
@normalizer = Normalizer.new(path)
|
24
|
+
when Hash
|
25
|
+
@namespace = open_options[:namespace]
|
26
|
+
@format = nil
|
27
|
+
@normalizer = RegExpNER.new(path)
|
23
28
|
else
|
24
29
|
open_options = Misc.add_defaults open_options, :type => :flat
|
25
30
|
tsv = TSV.open(path, open_options)
|
data/lib/rbbt/ner/regexpNER.rb
CHANGED
@@ -56,8 +56,8 @@ class RegExpNER < NER
|
|
56
56
|
chunks = Segment.split(text, matches)
|
57
57
|
chunks.each do |chunk|
|
58
58
|
chunk_offset = chunk.offset
|
59
|
-
match_regexp_list(chunk, regexp_list, type).
|
60
|
-
match.offset
|
59
|
+
match_regexp_list(chunk, regexp_list, type).each do |match|
|
60
|
+
match.offset = match.offset + chunk_offset;
|
61
61
|
matches << match
|
62
62
|
end
|
63
63
|
end
|
@@ -68,9 +68,12 @@ class RegExpNER < NER
|
|
68
68
|
|
69
69
|
attr_accessor :regexps
|
70
70
|
def initialize(regexps = {})
|
71
|
-
@regexps = regexps.collect
|
71
|
+
@regexps = regexps.collect{|p| p }
|
72
72
|
end
|
73
73
|
|
74
|
+
def token_score(*args)
|
75
|
+
1
|
76
|
+
end
|
74
77
|
|
75
78
|
def __define_regexp_hook(name, regexp, *args)
|
76
79
|
@regexps << [name, regexp]
|
@@ -86,7 +89,9 @@ class RegExpNER < NER
|
|
86
89
|
|
87
90
|
def match(text)
|
88
91
|
matches = RegExpNER.match_regexp_hash(text, @regexps)
|
89
|
-
matches
|
92
|
+
matches.collect do |m|
|
93
|
+
NamedEntity.setup(m, :offset => m.offset, :type => m.type, :code => m)
|
94
|
+
end
|
90
95
|
end
|
91
96
|
|
92
97
|
end
|
data/lib/rbbt/ner/segment.rb
CHANGED
@@ -10,7 +10,7 @@ class TestFinder < Test::Unit::TestCase
|
|
10
10
|
|
11
11
|
def test_namespace_and_format
|
12
12
|
f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
|
13
|
-
assert_equal "Hsa", f.instances.first.namespace
|
13
|
+
assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
|
14
14
|
assert_equal "Ensembl Gene ID", f.instances.first.format
|
15
15
|
end
|
16
16
|
|
@@ -19,15 +19,24 @@ class TestFinder < Test::Unit::TestCase
|
|
19
19
|
|
20
20
|
assert_equal "ENSG00000115524", f.find("SF3B1").first
|
21
21
|
if defined? Entity
|
22
|
-
ddd f.find("SF3B1").first.info
|
23
22
|
assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
|
24
23
|
end
|
25
24
|
end
|
26
25
|
|
27
|
-
def
|
26
|
+
def test_find2
|
28
27
|
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
|
29
28
|
|
29
|
+
m = f.find("RAS").first
|
30
|
+
|
30
31
|
assert f.find("RAS").include? "ENSG00000113319"
|
31
32
|
end
|
32
33
|
|
34
|
+
def test_find_mutation
|
35
|
+
f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
|
36
|
+
f.add_instance({"Genomic Mutation" => /\w+:\w+:\w+/})
|
37
|
+
|
38
|
+
m = f.find("The mutation 1:1234:A is intergenic").first
|
39
|
+
assert_equal "1:1234:A", m.info[:code]
|
40
|
+
end
|
41
|
+
|
33
42
|
end
|
@@ -79,6 +79,7 @@ class TestRegExpNER < Test::Unit::TestCase
|
|
79
79
|
assert_equal :should, matches.select{|m| m.type == :should}[0].type
|
80
80
|
end
|
81
81
|
|
82
|
+
|
82
83
|
def test_regexp_order
|
83
84
|
text =<<-EOF
|
84
85
|
* Human AUC 0-24h= 7591 ng.h/ml at 30 mg/day In mice, dietary administration of aripiprazole at doses of 1, 3, and 10 asdf mg/kg/day for 104 weeks was
|
data/test/test_helper.rb
CHANGED
@@ -20,8 +20,8 @@ class Test::Unit::TestCase
|
|
20
20
|
|
21
21
|
def teardown
|
22
22
|
FileUtils.rm_rf Rbbt.tmp.test.find :user
|
23
|
-
Persist::
|
24
|
-
Persist::
|
23
|
+
Persist::CONNECTIONS.values.each do |c| c.close end
|
24
|
+
Persist::CONNECTIONS.clear
|
25
25
|
DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
|
26
26
|
DocumentRepo::TC_CONNECTIONS.clear
|
27
27
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2015-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
178
|
version: '0'
|
179
179
|
requirements: []
|
180
180
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.
|
181
|
+
rubygems_version: 2.4.6
|
182
182
|
signing_key:
|
183
183
|
specification_version: 4
|
184
184
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|