rbbt-text 1.1.3 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8e24d9496396b5679cdea2d9e4659fdc86bf9baf
4
- data.tar.gz: 97b1cafa958ee4e60bd66e64fc3803d2fa2bf972
3
+ metadata.gz: 571b05a78ab3a1bb51d37604cab69773555fd331
4
+ data.tar.gz: 61983fbd5d98c8412bb2fd36d0639eb7086f9287
5
5
  SHA512:
6
- metadata.gz: 21c91bf76cfb78e0c2a674a62c5c8e7971e981c631e74091524610f37351992d3b9ca5b197fcfccfe7d5315df5125cd1d6b5dfbc3b70566d9df7f9afe90d0a32
7
- data.tar.gz: dbfd6442800434cf7073591f27007392e003d9511f2e00d0bd6c12c6ad5fd22bdde4b9e4b729334ddf6957f64aba360a0b407f926fa0e8c7bbe84e2bb9318bcf
6
+ metadata.gz: 25f718d2733cbd5bb2bb521bdc4d50e7386ac39a573c2424897bad98b1f8674beeec56c7162bec91c5afb555163eea995bbd740f9d855d13fc084d6248eff4cd
7
+ data.tar.gz: 184f47a53ffcb220ba68faf03343f869eb65b0f12022561355c9cf417245781ebfc46efa81a49c1a9d9d746b9e42434bbe4e9dc49cc6d70eb216ecc6f0517b48
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/ner/rnorm'
2
+ require 'rbbt/ner/regexpNER'
2
3
 
3
4
  class Finder
4
5
 
@@ -20,6 +21,10 @@ class Finder
20
21
  @namespace = path.namespace
21
22
  @format = path.key_field
22
23
  @normalizer = Normalizer.new(path)
24
+ when Hash
25
+ @namespace = open_options[:namespace]
26
+ @format = nil
27
+ @normalizer = RegExpNER.new(path)
23
28
  else
24
29
  open_options = Misc.add_defaults open_options, :type => :flat
25
30
  tsv = TSV.open(path, open_options)
@@ -56,8 +56,8 @@ class RegExpNER < NER
56
56
  chunks = Segment.split(text, matches)
57
57
  chunks.each do |chunk|
58
58
  chunk_offset = chunk.offset
59
- match_regexp_list(chunk, regexp_list, type).collect do |match|
60
- match.offset += chunk_offset;
59
+ match_regexp_list(chunk, regexp_list, type).each do |match|
60
+ match.offset = match.offset + chunk_offset;
61
61
  matches << match
62
62
  end
63
63
  end
@@ -68,9 +68,12 @@ class RegExpNER < NER
68
68
 
69
69
  attr_accessor :regexps
70
70
  def initialize(regexps = {})
71
- @regexps = regexps.collect
71
+ @regexps = regexps.collect{|p| p }
72
72
  end
73
73
 
74
+ def token_score(*args)
75
+ 1
76
+ end
74
77
 
75
78
  def __define_regexp_hook(name, regexp, *args)
76
79
  @regexps << [name, regexp]
@@ -86,7 +89,9 @@ class RegExpNER < NER
86
89
 
87
90
  def match(text)
88
91
  matches = RegExpNER.match_regexp_hash(text, @regexps)
89
- matches
92
+ matches.collect do |m|
93
+ NamedEntity.setup(m, :offset => m.offset, :type => m.type, :code => m)
94
+ end
90
95
  end
91
96
 
92
97
  end
@@ -5,10 +5,6 @@ module Segment
5
5
  extend Annotation
6
6
  self.annotation :offset
7
7
 
8
- def offset=(offset)
9
- @offset = offset.nil? ? nil : offset.to_i
10
- end
11
-
12
8
  def segment_length
13
9
  begin
14
10
  super()
@@ -10,7 +10,7 @@ class TestFinder < Test::Unit::TestCase
10
10
 
11
11
  def test_namespace_and_format
12
12
  f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
13
- assert_equal "Hsa", f.instances.first.namespace
13
+ assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
14
14
  assert_equal "Ensembl Gene ID", f.instances.first.format
15
15
  end
16
16
 
@@ -19,15 +19,24 @@ class TestFinder < Test::Unit::TestCase
19
19
 
20
20
  assert_equal "ENSG00000115524", f.find("SF3B1").first
21
21
  if defined? Entity
22
- ddd f.find("SF3B1").first.info
23
22
  assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
24
23
  end
25
24
  end
26
25
 
27
- def test_find
26
+ def test_find2
28
27
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
29
28
 
29
+ m = f.find("RAS").first
30
+
30
31
  assert f.find("RAS").include? "ENSG00000113319"
31
32
  end
32
33
 
34
+ def test_find_mutation
35
+ f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
36
+ f.add_instance({"Genomic Mutation" => /\w+:\w+:\w+/})
37
+
38
+ m = f.find("The mutation 1:1234:A is intergenic").first
39
+ assert_equal "1:1234:A", m.info[:code]
40
+ end
41
+
33
42
  end
@@ -79,6 +79,7 @@ class TestRegExpNER < Test::Unit::TestCase
79
79
  assert_equal :should, matches.select{|m| m.type == :should}[0].type
80
80
  end
81
81
 
82
+
82
83
  def test_regexp_order
83
84
  text =<<-EOF
84
85
  * Human AUC 0-24h= 7591 ng.h/ml at 30 mg/day In mice, dietary administration of aripiprazole at doses of 1, 3, and 10 asdf mg/kg/day for 104 weeks was
@@ -20,8 +20,8 @@ class Test::Unit::TestCase
20
20
 
21
21
  def teardown
22
22
  FileUtils.rm_rf Rbbt.tmp.test.find :user
23
- Persist::TC_CONNECTIONS.values.each do |c| c.close end
24
- Persist::TC_CONNECTIONS.clear
23
+ Persist::CONNECTIONS.values.each do |c| c.close end
24
+ Persist::CONNECTIONS.clear
25
25
  DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
26
26
  DocumentRepo::TC_CONNECTIONS.clear
27
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-26 00:00:00.000000000 Z
11
+ date: 2015-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  version: '0'
179
179
  requirements: []
180
180
  rubyforge_project:
181
- rubygems_version: 2.2.2
181
+ rubygems_version: 2.4.6
182
182
  signing_key:
183
183
  specification_version: 4
184
184
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)