rbbt-text 1.1.3 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8e24d9496396b5679cdea2d9e4659fdc86bf9baf
4
- data.tar.gz: 97b1cafa958ee4e60bd66e64fc3803d2fa2bf972
3
+ metadata.gz: 571b05a78ab3a1bb51d37604cab69773555fd331
4
+ data.tar.gz: 61983fbd5d98c8412bb2fd36d0639eb7086f9287
5
5
  SHA512:
6
- metadata.gz: 21c91bf76cfb78e0c2a674a62c5c8e7971e981c631e74091524610f37351992d3b9ca5b197fcfccfe7d5315df5125cd1d6b5dfbc3b70566d9df7f9afe90d0a32
7
- data.tar.gz: dbfd6442800434cf7073591f27007392e003d9511f2e00d0bd6c12c6ad5fd22bdde4b9e4b729334ddf6957f64aba360a0b407f926fa0e8c7bbe84e2bb9318bcf
6
+ metadata.gz: 25f718d2733cbd5bb2bb521bdc4d50e7386ac39a573c2424897bad98b1f8674beeec56c7162bec91c5afb555163eea995bbd740f9d855d13fc084d6248eff4cd
7
+ data.tar.gz: 184f47a53ffcb220ba68faf03343f869eb65b0f12022561355c9cf417245781ebfc46efa81a49c1a9d9d746b9e42434bbe4e9dc49cc6d70eb216ecc6f0517b48
@@ -1,4 +1,5 @@
1
1
  require 'rbbt/ner/rnorm'
2
+ require 'rbbt/ner/regexpNER'
2
3
 
3
4
  class Finder
4
5
 
@@ -20,6 +21,10 @@ class Finder
20
21
  @namespace = path.namespace
21
22
  @format = path.key_field
22
23
  @normalizer = Normalizer.new(path)
24
+ when Hash
25
+ @namespace = open_options[:namespace]
26
+ @format = nil
27
+ @normalizer = RegExpNER.new(path)
23
28
  else
24
29
  open_options = Misc.add_defaults open_options, :type => :flat
25
30
  tsv = TSV.open(path, open_options)
@@ -56,8 +56,8 @@ class RegExpNER < NER
56
56
  chunks = Segment.split(text, matches)
57
57
  chunks.each do |chunk|
58
58
  chunk_offset = chunk.offset
59
- match_regexp_list(chunk, regexp_list, type).collect do |match|
60
- match.offset += chunk_offset;
59
+ match_regexp_list(chunk, regexp_list, type).each do |match|
60
+ match.offset = match.offset + chunk_offset;
61
61
  matches << match
62
62
  end
63
63
  end
@@ -68,9 +68,12 @@ class RegExpNER < NER
68
68
 
69
69
  attr_accessor :regexps
70
70
  def initialize(regexps = {})
71
- @regexps = regexps.collect
71
+ @regexps = regexps.collect{|p| p }
72
72
  end
73
73
 
74
+ def token_score(*args)
75
+ 1
76
+ end
74
77
 
75
78
  def __define_regexp_hook(name, regexp, *args)
76
79
  @regexps << [name, regexp]
@@ -86,7 +89,9 @@ class RegExpNER < NER
86
89
 
87
90
  def match(text)
88
91
  matches = RegExpNER.match_regexp_hash(text, @regexps)
89
- matches
92
+ matches.collect do |m|
93
+ NamedEntity.setup(m, :offset => m.offset, :type => m.type, :code => m)
94
+ end
90
95
  end
91
96
 
92
97
  end
@@ -5,10 +5,6 @@ module Segment
5
5
  extend Annotation
6
6
  self.annotation :offset
7
7
 
8
- def offset=(offset)
9
- @offset = offset.nil? ? nil : offset.to_i
10
- end
11
-
12
8
  def segment_length
13
9
  begin
14
10
  super()
@@ -10,7 +10,7 @@ class TestFinder < Test::Unit::TestCase
10
10
 
11
11
  def test_namespace_and_format
12
12
  f = Finder.new(CMD.cmd("head -n 1000", :in => Open.open(Organism.identifiers(Organism.default_code("Hsa")).produce.find)))
13
- assert_equal "Hsa", f.instances.first.namespace
13
+ assert_equal Organism.default_code("Hsa"), f.instances.first.namespace
14
14
  assert_equal "Ensembl Gene ID", f.instances.first.format
15
15
  end
16
16
 
@@ -19,15 +19,24 @@ class TestFinder < Test::Unit::TestCase
19
19
 
20
20
  assert_equal "ENSG00000115524", f.find("SF3B1").first
21
21
  if defined? Entity
22
- ddd f.find("SF3B1").first.info
23
22
  assert_equal "Ensembl Gene ID", f.find("SF3B1").first.format
24
23
  end
25
24
  end
26
25
 
27
- def test_find
26
+ def test_find2
28
27
  f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
29
28
 
29
+ m = f.find("RAS").first
30
+
30
31
  assert f.find("RAS").include? "ENSG00000113319"
31
32
  end
32
33
 
34
+ def test_find_mutation
35
+ f = Finder.new(Organism.lexicon(Organism.default_code("Hsa")), :grep => ["RASGRF2"])
36
+ f.add_instance({"Genomic Mutation" => /\w+:\w+:\w+/})
37
+
38
+ m = f.find("The mutation 1:1234:A is intergenic").first
39
+ assert_equal "1:1234:A", m.info[:code]
40
+ end
41
+
33
42
  end
@@ -79,6 +79,7 @@ class TestRegExpNER < Test::Unit::TestCase
79
79
  assert_equal :should, matches.select{|m| m.type == :should}[0].type
80
80
  end
81
81
 
82
+
82
83
  def test_regexp_order
83
84
  text =<<-EOF
84
85
  * Human AUC 0-24h= 7591 ng.h/ml at 30 mg/day In mice, dietary administration of aripiprazole at doses of 1, 3, and 10 asdf mg/kg/day for 104 weeks was
@@ -20,8 +20,8 @@ class Test::Unit::TestCase
20
20
 
21
21
  def teardown
22
22
  FileUtils.rm_rf Rbbt.tmp.test.find :user
23
- Persist::TC_CONNECTIONS.values.each do |c| c.close end
24
- Persist::TC_CONNECTIONS.clear
23
+ Persist::CONNECTIONS.values.each do |c| c.close end
24
+ Persist::CONNECTIONS.clear
25
25
  DocumentRepo::TC_CONNECTIONS.values.each do |c| c.close end
26
26
  DocumentRepo::TC_CONNECTIONS.clear
27
27
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-08-26 00:00:00.000000000 Z
11
+ date: 2015-07-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -178,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
178
178
  version: '0'
179
179
  requirements: []
180
180
  rubyforge_project:
181
- rubygems_version: 2.2.2
181
+ rubygems_version: 2.4.6
182
182
  signing_key:
183
183
  specification_version: 4
184
184
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)