rbbt-text 0.6.2 → 0.6.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -90,6 +90,7 @@ class Dictionary::TF_IDF
90
90
  @terms[term].to_f / num_docs * Math::log(1.0/df_value)
91
91
  ]
92
92
  }
93
+
93
94
  if limit
94
95
  Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit).flatten]
95
96
  else
@@ -148,7 +148,9 @@ class Document
148
148
  fields = data.fields if fields.nil? and data.respond_to? :fields
149
149
 
150
150
 
151
- data.filter
151
+ if data.respond_to? :persistence_path and String === data.persistence_path
152
+ data.filter(data.persistence_path + '.filters')
153
+ end
152
154
  data.add_filter("field:#{ doc_field }", @docid)
153
155
  data.add_filter("field:#{ entity_field }", "#{ entity }")
154
156
  keys = data.keys
@@ -157,7 +159,7 @@ class Document
157
159
 
158
160
  if keys.empty?
159
161
  segments = produce_#{entity}
160
- segments << Segment.setup("No #{entity} found in document #{ @docid }", -1) if segments.empty?
162
+ segments << Segment.setup("No #{entity} found in document " + @docid.to_s, -1) if segments.empty?
161
163
  tsv = Segment.tsv(segments, *fields.reject{|f| ["#{doc_field}", "#{entity_field}", "Start", "End", "annotation_types"].include? f})
162
164
 
163
165
  tsv.add_field "#{ doc_field }" do
@@ -178,6 +180,7 @@ class Document
178
180
  data.pop_filter
179
181
  data.pop_filter
180
182
  data.read
183
+
181
184
  else
182
185
  if raw == :check
183
186
  data.close
@@ -99,7 +99,8 @@ module DocumentRepo
99
99
  end
100
100
 
101
101
  def add(text, namespace, id, type, hash)
102
- write
102
+ read
103
+ write unless write?
103
104
  docid = fields2docid(namespace, id, type, hash)
104
105
  self[docid] = text unless self.include? docid
105
106
  read
@@ -0,0 +1,40 @@
1
+ require 'rbbt/entity'
2
+
3
+ module Document
4
+ extend Entity
5
+
6
+ class << self
7
+ attr_accessor :corpus
8
+ end
9
+
10
+ property :text => :array2single do
11
+ article_text = {}
12
+ missing = []
13
+
14
+ self.each do |doc|
15
+ Document.corpus.read if Document.corpus.respond_to? :read
16
+ if Document.corpus.include?(doc)
17
+ article_text[doc] = Document.corpus[doc]
18
+ else
19
+ missing << doc
20
+ end
21
+ end
22
+
23
+ if missing.any?
24
+ missing.first.annotate missing
25
+ missing_text = Misc.process_to_hash(missing){|list| list._get_text}
26
+
27
+ Misc.lock Document.corpus.persistence_path do
28
+ Document.corpus.write if Document.corpus.respond_to? :write
29
+ missing_text.each do |doc, text|
30
+ article_text[doc] = text
31
+ Document.corpus[doc] = text
32
+ end
33
+ Document.corpus.read if Document.corpus.respond_to? :read
34
+ end
35
+ end
36
+
37
+ article_text.values_at *self
38
+ end
39
+
40
+ end
@@ -5,6 +5,10 @@ module Segment
5
5
  extend Annotation
6
6
  self.annotation :offset
7
7
 
8
+ def offset=(offset)
9
+ @offset = offset.nil? ? nil : offset.to_i
10
+ end
11
+
8
12
  #{{{ Ranges
9
13
 
10
14
  def end
@@ -297,8 +301,11 @@ module Segment
297
301
  end
298
302
 
299
303
  def self.load_tsv(tsv)
300
- tsv.collect do |id, values|
301
- Annotated.load_tsv_values(id, values, tsv.fields)
304
+ fields = tsv.fields
305
+ tsv.with_unnamed do
306
+ tsv.collect do |id, values|
307
+ Annotated.load_tsv_values(id, values, fields)
308
+ end
302
309
  end
303
310
  end
304
311
 
@@ -1,9 +1,11 @@
1
1
  require 'rbbt/ner/segment'
2
+ require 'rbbt/entity'
2
3
 
3
4
  module NamedEntity
4
- extend Annotation
5
+ extend Entity
5
6
  include Segment
6
- self.annotation :type, :code, :score
7
+
8
+ self.annotation :type, :code, :score, :docid
7
9
 
8
10
  def report
9
11
  <<-EOF
@@ -2,9 +2,34 @@ require 'rbbt/annotations'
2
2
  require 'rbbt/ner/segment'
3
3
 
4
4
  module Token
5
- extend Annotation
6
- include Segment
7
- self.annotation :original
5
+ attr_accessor :offset, :original
6
+
7
+ def self.all_annotations
8
+ [:offset, :original]
9
+ end
10
+
11
+ def self.setup(text, start, original = nil)
12
+ text.extend Token
13
+ text.offset = start
14
+ text.original = original
15
+ text
16
+ end
17
+
18
+ def info
19
+ {:original => original, :offset => offset}
20
+ end
21
+
22
+ def id
23
+ Misc.hash2md5 info.merge :self => self
24
+ end
25
+
26
+ def end
27
+ offset + self.length - 1
28
+ end
29
+
30
+ def range
31
+ (offset..self.end)
32
+ end
8
33
 
9
34
  def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
10
35
 
@@ -1,9 +1,9 @@
1
+ require 'rbbt/util/misc'
1
2
  require 'rbbt/ner/segment'
3
+
2
4
  module Transformed
3
- attr_accessor :transformation_offset_differences, :transformation_original
4
5
 
5
6
  def self.transform(text, segments, replacement = nil, &block)
6
- require 'rbbt/util/misc'
7
7
 
8
8
  text.extend Transformed
9
9
  text.replace(segments, replacement, &block)
@@ -12,7 +12,6 @@ module Transformed
12
12
  end
13
13
 
14
14
  def self.with_transform(text, segments, replacement)
15
- require 'rbbt/util/misc'
16
15
 
17
16
  text.extend Transformed
18
17
  text.replace(segments, replacement)
@@ -24,147 +23,149 @@ module Transformed
24
23
  text.restore(segments, true)
25
24
  end
26
25
 
27
- def transform_pos(pos)
28
- return pos if transformation_offset_differences.nil?
29
- # tranformation_offset_differences are assumed to be sorted in reverse
30
- # order
31
- transformation_offset_differences.reverse.each do |trans_diff|
32
- acc = 0
33
- trans_diff.reverse.each do |offset, diff, orig_length, trans_length|
34
- break if offset >= pos
35
- acc += diff
26
+ attr_accessor :transformed_segments, :transformation_stack
27
+
28
+ def shift(segment_o)
29
+ begin_shift = 0
30
+ end_shift = 0
31
+
32
+ @transformed_segments.sort_by{|id, info| info.last}.each{|id,info|
33
+ pseg_o, diff = info
34
+
35
+ case
36
+ # Before
37
+ when segment_o.last + end_shift < pseg_o.begin
38
+ # After
39
+ when (segment_o.begin + begin_shift > pseg_o.last)
40
+ begin_shift += diff
41
+ end_shift += diff
42
+ # Includes
43
+ when (segment_o.begin + begin_shift <= pseg_o.begin and segment_o.last + end_shift >= pseg_o.last)
44
+ end_shift += diff
45
+ # Inside
46
+ when (segment_o.begin + begin_shift >= pseg_o.begin and segment_o.last + end_shift <= pseg_o.last)
47
+ return nil
48
+ # Overlaps start
49
+ when (segment_o.begin + begin_shift <= pseg_o.begin and segment_o.last + end_shift <= pseg_o.last)
50
+ return nil
51
+ # Overlaps end
52
+ when (segment_o.begin + begin_shift >= pseg_o.begin and segment_o.last + end_shift >= pseg_o.last)
53
+ return nil
54
+ else
55
+ raise "Unknown overlaps: #{segment_o.inspect} - #{pseg_o.inspect}"
36
56
  end
37
- pos = pos - acc
38
- end
57
+ }
39
58
 
40
- pos
59
+ [begin_shift, end_shift]
41
60
  end
42
61
 
43
- def transform_range(range)
44
- (transform_pos(range.begin)..transform_pos(range.end))
62
+ def self.sort(segments)
63
+ segments.compact.sort do |a,b|
64
+ case
65
+ when ((a.nil? and b.nil?) or (a.offset.nil? and b.offset.nil?))
66
+ 0
67
+ when (a.nil? or a.offset.nil?)
68
+ -1
69
+ when (b.nil? or b.offset.nil?)
70
+ +1
71
+ # Non-overlap
72
+ when (a.end < b.offset or b.end < a.offset)
73
+ b.offset <=> a.offset
74
+ # b includes a
75
+ when (a.offset >= b.offset and a.end <= b.end)
76
+ -1
77
+ # b includes a
78
+ when (b.offset >= a.offset and b.end <= a.end)
79
+ +1
80
+ # Overlap
81
+ when (a.offset > b.offset and a.end > b.end or b.offset < a.offset and b.end > a.end)
82
+ a.length <=> b.length
83
+ else
84
+ raise "Unexpected case in sort: #{a.range} - #{b.range}"
85
+ end
86
+ end
45
87
  end
46
88
 
47
- def transformed_set(pos, value)
48
- transformed_pos = case
49
- when Range === pos
50
- transform_range(pos)
51
- when Integer === pos
52
- transform_pos(pos)
53
- else
54
- raise "Text position not understood '#{pos.inspect}'. Not Range or Integer"
55
- end
56
-
57
- self[transformed_pos] = value
58
- end
89
+ def replace(segments, replacement = nil, &block)
90
+ @transformed_segments ||= {}
91
+ @transformation_stack ||= []
92
+ stack = []
59
93
 
60
- def transformed_get(pos)
61
- transformed_pos = case
62
- when Range === pos
63
- transform_range(pos)
64
- when Integer === pos
65
- transform_pos(pos)
66
- else
67
- raise "Text position not understood '#{pos.inspect}'. Not Range or Integer"
68
- end
69
-
70
- self[transformed_pos]
71
- end
94
+ Transformed.sort(segments).each do |segment|
95
+ next if segment.offset.nil?
96
+ shift = shift segment.range
72
97
 
73
- def conflict?(segment_range)
74
- return false if @transformation_offset_differences.nil? or @transformation_offset_differences.empty?
75
- transformation_offset_difference = @transformation_offset_differences.last
98
+ next if shift.nil?
76
99
 
77
- transformation_offset_difference.each do |info|
78
- offset, diff, orig_length, trans_length = info
79
- return true if segment_range.begin > offset and segment_range.begin < offset + trans_length or
80
- segment_range.end > offset and segment_range.end < offset + trans_length
81
- end
100
+ shift_begin, shift_end = shift
82
101
 
83
- return false
84
- end
102
+ text_offset = self.respond_to?(:offset)? self.offset : 0
103
+ updated_begin = segment.offset + shift_begin - text_offset
104
+ updated_end = segment.range.last + shift_end - text_offset
85
105
 
86
- def replace(segments, replacement = nil, &block)
87
- replacement ||= block
88
- raise "No replacement given" if replacement.nil?
89
- transformation_offset_differences = []
90
- transformation_original = []
106
+ updated_range = (updated_begin..updated_end)
91
107
 
92
- Segment.clean_sort(segments).reverse.each do |segment|
93
- untransformed_segment_range_here= segment.range_in(self)
94
- transformed_segment_range = self.transform_range(untransformed_segment_range_here)
95
- next if conflict?(transformed_segment_range)
108
+ updated_text = self[updated_begin..updated_end]
96
109
 
97
- text_before_transform = self[transformed_segment_range]
110
+ original_text = segment.dup
111
+ segment.replace updated_text
98
112
 
99
113
  case
114
+ when block_given?
115
+ new = block.call(segment)
100
116
  when String === replacement
101
- transformed_text = replacement
117
+ new = replacement
102
118
  when Proc === replacement
119
+ new = replacement.call(segment)
120
+ end
103
121
 
104
- # Prepare segment with new text
105
- save_segment_text = segment.dup
106
- save_offset = segment.offset
107
- segment.replace text_before_transform
108
- segment.offset = transformed_segment_range.begin
122
+ diff = new.length - segment.length
109
123
 
110
- transformed_text = replacement.call segment
124
+ self[updated_begin..updated_end] = new
111
125
 
112
- # Restore segment with original text
113
- segment.replace save_segment_text
114
- segment.offset = save_offset
115
- else
116
- raise "Replacemente not String nor Proc"
117
- end
118
- diff = segment.length - transformed_text.length
119
- self[transformed_segment_range] = transformed_text
126
+ @transformed_segments[segment.object_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
120
127
 
121
- transformation_offset_differences << [untransformed_segment_range_here.begin, diff, text_before_transform.length, transformed_text.length]
122
- transformation_original << text_before_transform
128
+ segment.replace original_text
129
+ stack << segment.object_id
123
130
  end
131
+ @transformation_stack << stack
132
+ end
124
133
 
125
- @transformation_offset_differences ||= []
126
- @transformation_offset_differences << transformation_offset_differences
127
- @transformation_original ||= []
128
- @transformation_original << transformation_original
134
+ def fix_segment(segment, range, diff)
135
+ case
136
+ # Before
137
+ when segment.end < range.begin
138
+ # After
139
+ when segment.offset > range.end + diff
140
+ segment.offset -= diff
141
+ # Includes
142
+ when (segment.offset <= range.begin and segment.end >= range.end + diff)
143
+ segment.replace self[segment.offset..segment.end - diff]
144
+ else
145
+ raise "Segment Overlaps"
146
+ end
129
147
  end
130
148
 
131
- def restore(segments = nil, first_only = false)
132
- stop = false
133
- while self.transformation_offset_differences.any? and not stop
134
- transformation_offset_differences = self.transformation_offset_differences.pop
135
- transformation_original = self.transformation_original.pop
149
+ def restore(segments, first_only = false)
150
+ return segments if @transformation_stack.empty?
136
151
 
137
- ranges = transformation_offset_differences.collect do |offset,diff,orig_length,rep_length|
138
- (offset..(offset + rep_length - 1))
139
- end
152
+ if first_only
153
+ @transformation_stack.pop.reverse.each do |id|
154
+ orig_range, diff, text, range = @transformed_segments.delete id
140
155
 
141
- ranges.zip(transformation_original).reverse.each do |range,text|
142
- self.transformed_set(range, text)
156
+ new_range = (range.begin..range.last + diff)
157
+ self[new_range] = text
158
+ segments.each do |segment|
159
+ next unless Segment === segment
160
+ fix_segment(segment, range, diff)
161
+ end if Array === segments
143
162
  end
144
-
145
- stop = true if first_only
146
-
147
- next if segments.nil?
148
-
149
- segment_ranges = segments.each do |segment|
150
- r = segment.range
151
-
152
- s = r.begin
153
- e = r.end
154
- sdiff = 0
155
- ediff = 0
156
- transformation_offset_differences.reverse.each do |offset,diff,orig_length,rep_length|
157
- sdiff += diff if offset < s
158
- ediff += diff if offset + rep_length - 1 < e
159
- end
160
-
161
- segment.offset = s + sdiff
162
- segment.replace self[(s+sdiff)..(e + ediff)]
163
+ segments
164
+ else
165
+ while @transformation_stack.any?
166
+ restore(segments, true)
163
167
  end
168
+ segments
164
169
  end
165
-
166
- segments
167
170
  end
168
171
  end
169
-
170
-
@@ -110,7 +110,7 @@ class TokenTrieNER < NER
110
110
  end
111
111
 
112
112
  def self.merge(index1, index2)
113
- index1.write if index1.respond_to? :write
113
+ index1.write if index1.respond_to? :write and not index1.write?
114
114
  index2.each do |key, new_index2|
115
115
  case
116
116
  when key == :END
@@ -119,7 +119,8 @@ class TokenTrieNER < NER
119
119
  end1.uniq!
120
120
  index1[:END] = end1
121
121
  when index1.include?(key)
122
- index1[key] = merge(index1[key], new_index2)
122
+ new = merge(index1[key], new_index2)
123
+ index1[key] = new
123
124
  else
124
125
  index1[key] = new_index2
125
126
  end
@@ -148,7 +149,10 @@ class TokenTrieNER < NER
148
149
  tokens = Array === name ? name : tokenize(name, false, split_at, no_clean)
149
150
  tokens.extend EnumeratedArray
150
151
 
151
- tmp_index = merge(tmp_index, index_for_tokens(tokens, code, type, slack)) unless tokens.empty?
152
+ token_index = index_for_tokens(tokens, code, type, slack)
153
+
154
+ tmp_index = merge(tmp_index, token_index) unless tokens.empty?
155
+
152
156
  items_in_chunk += 1
153
157
 
154
158
  if items_in_chunk > chunk_size
@@ -267,22 +271,22 @@ class TokenTrieNER < NER
267
271
  TokenTrieNER.merge(@index, new.index)
268
272
  when TSV === new
269
273
  Log.debug "TokenTrieNER merging TSV"
270
- old_unnamed = new.unnamed
271
- old_monitor = new.monitor
272
- new.unnamed = true
273
- new.monitor = {:step => 1000, :desc => "Processing TSV into TokenTrieNER"}
274
- TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
275
- new.unnamed = old_unnamed
276
- new.monitor = old_monitor
274
+ new.with_unnamed do
275
+ new.with_monitor({:step => 1000, :desc => "Processing TSV into TokenTrieNER"}) do
276
+ TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
277
+ end
278
+ end
277
279
  when Hash === new
278
280
  Log.debug "TokenTrieNER merging Hash"
279
281
  TokenTrieNER.merge(@index, new)
280
282
  when String === new
281
283
  Log.debug "TokenTrieNER merging file: #{ new }"
282
284
  new = TSV.open(new, :flat)
283
- new.unnamed = true
284
- new.monitor = {:step => 1000, :desc => "Processing TSV into TokenTrieNER"}
285
- TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
285
+ new.with_unnamed do
286
+ new.with_monitor({:step => 1000, :desc => "Processing TSV into TokenTrieNER"}) do
287
+ TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
288
+ end
289
+ end
286
290
  end
287
291
  end
288
292
 
@@ -2,7 +2,19 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
2
2
  require 'rbbt/corpus/document'
3
3
  require 'test/unit'
4
4
 
5
+ module TokenEntity
6
+ extend Annotation
7
+ include Segment
8
+ self.annotation :original
9
+ end
5
10
  class Document
11
+
12
+ def tokenize(text)
13
+ Token.tokenize(text).collect do |token|
14
+ TokenEntity.setup(token.dup, token.offset, token.original)
15
+ end
16
+ end
17
+
6
18
  define :sentences do
7
19
  require 'rbbt/nlp/nlp'
8
20
  NLP.geniass_sentence_splitter(text)
@@ -10,22 +22,22 @@ class Document
10
22
 
11
23
  define :tokens do
12
24
  require 'rbbt/ner/segment/token'
13
- Token.tokenize(text)
25
+ tokenize(text)
14
26
  end
15
27
 
16
28
  define :long_words do
17
29
  require 'rbbt/ner/segment/token'
18
- Token.tokenize(text).select{|tok| tok.length > 5}
30
+ tokenize(text).select{|tok| tok.length > 5}
19
31
  end
20
32
 
21
33
  define :short_words do
22
34
  require 'rbbt/ner/segment/token'
23
- Token.tokenize(text).select{|tok| tok.length < 5}
35
+ tokenize(text).select{|tok| tok.length < 5}
24
36
  end
25
37
 
26
38
  define :even_words do
27
39
  require 'rbbt/ner/segment/token'
28
- Token.tokenize(text).select{|tok| tok.length % 2 == 0}
40
+ tokenize(text).select{|tok| tok.length % 2 == 0}
29
41
  end
30
42
 
31
43
  define :missing do
@@ -110,7 +122,7 @@ another sentence.
110
122
  doc = Document.new(dir)
111
123
  doc.text = text
112
124
 
113
- sentence = doc.sentences.last
125
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
114
126
  doc.load_into sentence, :tokens
115
127
 
116
128
  assert_equal 5, sentence.tokens.length
@@ -134,7 +146,7 @@ another sentence.
134
146
  doc = Document.new(dir)
135
147
  doc.text = text
136
148
 
137
- sentence = doc.sentences.last
149
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
138
150
  Misc.benchmark(1) do
139
151
  doc = Document.new(dir)
140
152
  doc.text = text
@@ -166,7 +178,7 @@ another sentence.
166
178
  doc = Document.new(dir)
167
179
  doc.text = text * 10
168
180
 
169
- sentence = doc.sentences.last
181
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
170
182
 
171
183
  doc.load_into sentence, :tokens, :long_words
172
184
 
@@ -178,9 +190,9 @@ another sentence.
178
190
  doc = Document.new(dir)
179
191
  doc.text = text * 10
180
192
  doc.sentences
181
- assert_equal sentence, doc.sentences.last
193
+ assert_equal sentence, doc.sentences.sort_by{|sentence| sentence.offset}.last
182
194
 
183
- sentence = doc.sentences.last
195
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
184
196
  doc.load_into sentence, :tokens, :long_words
185
197
 
186
198
  assert_equal 2, sentence.long_words.length
@@ -211,7 +223,7 @@ another sentence.
211
223
  doc.text = text * 10
212
224
  doc.docid = "TEST"
213
225
 
214
- sentence = doc.sentences.last
226
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
215
227
 
216
228
  doc.load_into sentence, :tokens, :long_words, :short_words, :even_words
217
229
 
@@ -4,7 +4,7 @@ require 'rbbt/ner/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_info
7
- a = "test"
7
+ a = ["test"]
8
8
  NamedEntity.setup a
9
9
  assert(! a.info.keys.include?(:code))
10
10
  a.code = 10
@@ -1,9 +1,11 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/ner/segment/transformed'
3
3
  require 'rbbt/ner/segment/named_entity'
4
+ require 'rexml/document'
5
+ require 'rand'
4
6
 
5
7
  class TestClass < Test::Unit::TestCase
6
- def test_transform
8
+ def tttest_transform
7
9
  a = "This sentence mentions the TP53 gene and the CDK5 protein"
8
10
  original = a.dup
9
11
 
@@ -56,11 +58,13 @@ class TestClass < Test::Unit::TestCase
56
58
  Transformed.with_transform(a, [gene1], "GN") do
57
59
  assert_equal original.sub("TP53", 'GN'), a
58
60
  end
61
+
59
62
  assert_equal original, a
60
63
 
61
- Transformed.with_transform(a, [gene1,gene2], "GN") do
64
+ Transformed.with_transform(a, [gene1, gene2], "GN") do
62
65
  assert_equal original.gsub(/TP53|CDK5R1/, 'GN'), a
63
66
  end
67
+
64
68
  assert_equal original, a
65
69
 
66
70
  Transformed.with_transform(a, [gene1], "GN") do
@@ -69,6 +73,7 @@ class TestClass < Test::Unit::TestCase
69
73
  end
70
74
  assert_equal original.gsub(/TP53/, 'GN'), a
71
75
  end
76
+
72
77
  assert_equal original, a
73
78
 
74
79
  exp1, exp2 = nil, nil
@@ -169,7 +174,37 @@ class TestClass < Test::Unit::TestCase
169
174
  assert_equal one, a
170
175
  end
171
176
  end
172
-
173
177
  end
178
+
179
+ def test_error
180
+ a = "Do not have a diagnosis of another hereditary APC resistance/Factor V Leiden, Protein S or C deficiency, prothrombin gene mutation (G20210A), or acquired (lupus anticoagulant) thrombophilic disorder"
181
+
182
+ entity1 = "gene"
183
+ entity1.extend NamedEntity
184
+ entity1.offset = a.index entity1
185
+ entity1.type = "Gene"
186
+
187
+ entity2 = "prothrombin gene mutation"
188
+ entity2.extend NamedEntity
189
+ entity2.offset = a.index entity2
190
+ entity2.type = "Mutation"
191
+
192
+ entity3 = "Protein S or C"
193
+ entity3.extend NamedEntity
194
+ entity3.offset = a.index entity3
195
+ entity3.type = "Gene"
196
+
197
+ entity4 = "prothrombin gene mutation"
198
+ entity4.extend NamedEntity
199
+ entity4.offset = a.index entity2
200
+ entity4.type = "Disease"
201
+
202
+
203
+ Transformed.with_transform(a, [entity1].sort_by{rand}, Proc.new{|e| e.html}) do
204
+ Transformed.with_transform(a, [entity3, entity2, entity4].sort_by{rand}, Proc.new{|e| e.html}) do
205
+ assert_nothing_raised{REXML::Document.new "<xml>"+ a + "</xml>"}
206
+ end
207
+ end
208
+ end
174
209
  end
175
210
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 1
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 6
9
- - 2
10
- version: 0.6.2
9
+ - 3
10
+ version: 0.6.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-10-03 00:00:00 +02:00
19
- default_executable: get_ppis.rb
18
+ date: 2012-02-09 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rbbt-util
@@ -106,6 +105,7 @@ files:
106
105
  - lib/rbbt/corpus/document.rb
107
106
  - lib/rbbt/corpus/document_repo.rb
108
107
  - lib/rbbt/corpus/sources/pubmed.rb
108
+ - lib/rbbt/entity/document.rb
109
109
  - lib/rbbt/ner/NER.rb
110
110
  - lib/rbbt/ner/abner.rb
111
111
  - lib/rbbt/ner/banner.rb
@@ -161,7 +161,6 @@ files:
161
161
  - test/rbbt/corpus/test_corpus.rb
162
162
  - test/rbbt/corpus/test_document.rb
163
163
  - bin/get_ppis.rb
164
- has_rdoc: true
165
164
  homepage: http://github.com/mikisvaz/rbbt-util
166
165
  licenses: []
167
166
 
@@ -191,7 +190,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
190
  requirements: []
192
191
 
193
192
  rubyforge_project:
194
- rubygems_version: 1.6.2
193
+ rubygems_version: 1.8.10
195
194
  signing_key:
196
195
  specification_version: 3
197
196
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)