rbbt-text 0.6.2 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -90,6 +90,7 @@ class Dictionary::TF_IDF
90
90
  @terms[term].to_f / num_docs * Math::log(1.0/df_value)
91
91
  ]
92
92
  }
93
+
93
94
  if limit
94
95
  Hash[*best.sort{|a,b| b[1] <=> a[1]}.slice(0, limit).flatten]
95
96
  else
@@ -148,7 +148,9 @@ class Document
148
148
  fields = data.fields if fields.nil? and data.respond_to? :fields
149
149
 
150
150
 
151
- data.filter
151
+ if data.respond_to? :persistence_path and String === data.persistence_path
152
+ data.filter(data.persistence_path + '.filters')
153
+ end
152
154
  data.add_filter("field:#{ doc_field }", @docid)
153
155
  data.add_filter("field:#{ entity_field }", "#{ entity }")
154
156
  keys = data.keys
@@ -157,7 +159,7 @@ class Document
157
159
 
158
160
  if keys.empty?
159
161
  segments = produce_#{entity}
160
- segments << Segment.setup("No #{entity} found in document #{ @docid }", -1) if segments.empty?
162
+ segments << Segment.setup("No #{entity} found in document " + @docid.to_s, -1) if segments.empty?
161
163
  tsv = Segment.tsv(segments, *fields.reject{|f| ["#{doc_field}", "#{entity_field}", "Start", "End", "annotation_types"].include? f})
162
164
 
163
165
  tsv.add_field "#{ doc_field }" do
@@ -178,6 +180,7 @@ class Document
178
180
  data.pop_filter
179
181
  data.pop_filter
180
182
  data.read
183
+
181
184
  else
182
185
  if raw == :check
183
186
  data.close
@@ -99,7 +99,8 @@ module DocumentRepo
99
99
  end
100
100
 
101
101
  def add(text, namespace, id, type, hash)
102
- write
102
+ read
103
+ write unless write?
103
104
  docid = fields2docid(namespace, id, type, hash)
104
105
  self[docid] = text unless self.include? docid
105
106
  read
@@ -0,0 +1,40 @@
1
+ require 'rbbt/entity'
2
+
3
+ module Document
4
+ extend Entity
5
+
6
+ class << self
7
+ attr_accessor :corpus
8
+ end
9
+
10
+ property :text => :array2single do
11
+ article_text = {}
12
+ missing = []
13
+
14
+ self.each do |doc|
15
+ Document.corpus.read if Document.corpus.respond_to? :read
16
+ if Document.corpus.include?(doc)
17
+ article_text[doc] = Document.corpus[doc]
18
+ else
19
+ missing << doc
20
+ end
21
+ end
22
+
23
+ if missing.any?
24
+ missing.first.annotate missing
25
+ missing_text = Misc.process_to_hash(missing){|list| list._get_text}
26
+
27
+ Misc.lock Document.corpus.persistence_path do
28
+ Document.corpus.write if Document.corpus.respond_to? :write
29
+ missing_text.each do |doc, text|
30
+ article_text[doc] = text
31
+ Document.corpus[doc] = text
32
+ end
33
+ Document.corpus.read if Document.corpus.respond_to? :read
34
+ end
35
+ end
36
+
37
+ article_text.values_at *self
38
+ end
39
+
40
+ end
@@ -5,6 +5,10 @@ module Segment
5
5
  extend Annotation
6
6
  self.annotation :offset
7
7
 
8
+ def offset=(offset)
9
+ @offset = offset.nil? ? nil : offset.to_i
10
+ end
11
+
8
12
  #{{{ Ranges
9
13
 
10
14
  def end
@@ -297,8 +301,11 @@ module Segment
297
301
  end
298
302
 
299
303
  def self.load_tsv(tsv)
300
- tsv.collect do |id, values|
301
- Annotated.load_tsv_values(id, values, tsv.fields)
304
+ fields = tsv.fields
305
+ tsv.with_unnamed do
306
+ tsv.collect do |id, values|
307
+ Annotated.load_tsv_values(id, values, fields)
308
+ end
302
309
  end
303
310
  end
304
311
 
@@ -1,9 +1,11 @@
1
1
  require 'rbbt/ner/segment'
2
+ require 'rbbt/entity'
2
3
 
3
4
  module NamedEntity
4
- extend Annotation
5
+ extend Entity
5
6
  include Segment
6
- self.annotation :type, :code, :score
7
+
8
+ self.annotation :type, :code, :score, :docid
7
9
 
8
10
  def report
9
11
  <<-EOF
@@ -2,9 +2,34 @@ require 'rbbt/annotations'
2
2
  require 'rbbt/ner/segment'
3
3
 
4
4
  module Token
5
- extend Annotation
6
- include Segment
7
- self.annotation :original
5
+ attr_accessor :offset, :original
6
+
7
+ def self.all_annotations
8
+ [:offset, :original]
9
+ end
10
+
11
+ def self.setup(text, start, original = nil)
12
+ text.extend Token
13
+ text.offset = start
14
+ text.original = original
15
+ text
16
+ end
17
+
18
+ def info
19
+ {:original => original, :offset => offset}
20
+ end
21
+
22
+ def id
23
+ Misc.hash2md5 info.merge :self => self
24
+ end
25
+
26
+ def end
27
+ offset + self.length - 1
28
+ end
29
+
30
+ def range
31
+ (offset..self.end)
32
+ end
8
33
 
9
34
  def self.tokenize(text, split_at = /\s|(\(|\)|[-."':,])/, start = 0)
10
35
 
@@ -1,9 +1,9 @@
1
+ require 'rbbt/util/misc'
1
2
  require 'rbbt/ner/segment'
3
+
2
4
  module Transformed
3
- attr_accessor :transformation_offset_differences, :transformation_original
4
5
 
5
6
  def self.transform(text, segments, replacement = nil, &block)
6
- require 'rbbt/util/misc'
7
7
 
8
8
  text.extend Transformed
9
9
  text.replace(segments, replacement, &block)
@@ -12,7 +12,6 @@ module Transformed
12
12
  end
13
13
 
14
14
  def self.with_transform(text, segments, replacement)
15
- require 'rbbt/util/misc'
16
15
 
17
16
  text.extend Transformed
18
17
  text.replace(segments, replacement)
@@ -24,147 +23,149 @@ module Transformed
24
23
  text.restore(segments, true)
25
24
  end
26
25
 
27
- def transform_pos(pos)
28
- return pos if transformation_offset_differences.nil?
29
- # tranformation_offset_differences are assumed to be sorted in reverse
30
- # order
31
- transformation_offset_differences.reverse.each do |trans_diff|
32
- acc = 0
33
- trans_diff.reverse.each do |offset, diff, orig_length, trans_length|
34
- break if offset >= pos
35
- acc += diff
26
+ attr_accessor :transformed_segments, :transformation_stack
27
+
28
+ def shift(segment_o)
29
+ begin_shift = 0
30
+ end_shift = 0
31
+
32
+ @transformed_segments.sort_by{|id, info| info.last}.each{|id,info|
33
+ pseg_o, diff = info
34
+
35
+ case
36
+ # Before
37
+ when segment_o.last + end_shift < pseg_o.begin
38
+ # After
39
+ when (segment_o.begin + begin_shift > pseg_o.last)
40
+ begin_shift += diff
41
+ end_shift += diff
42
+ # Includes
43
+ when (segment_o.begin + begin_shift <= pseg_o.begin and segment_o.last + end_shift >= pseg_o.last)
44
+ end_shift += diff
45
+ # Inside
46
+ when (segment_o.begin + begin_shift >= pseg_o.begin and segment_o.last + end_shift <= pseg_o.last)
47
+ return nil
48
+ # Overlaps start
49
+ when (segment_o.begin + begin_shift <= pseg_o.begin and segment_o.last + end_shift <= pseg_o.last)
50
+ return nil
51
+ # Overlaps end
52
+ when (segment_o.begin + begin_shift >= pseg_o.begin and segment_o.last + end_shift >= pseg_o.last)
53
+ return nil
54
+ else
55
+ raise "Unknown overlaps: #{segment_o.inspect} - #{pseg_o.inspect}"
36
56
  end
37
- pos = pos - acc
38
- end
57
+ }
39
58
 
40
- pos
59
+ [begin_shift, end_shift]
41
60
  end
42
61
 
43
- def transform_range(range)
44
- (transform_pos(range.begin)..transform_pos(range.end))
62
+ def self.sort(segments)
63
+ segments.compact.sort do |a,b|
64
+ case
65
+ when ((a.nil? and b.nil?) or (a.offset.nil? and b.offset.nil?))
66
+ 0
67
+ when (a.nil? or a.offset.nil?)
68
+ -1
69
+ when (b.nil? or b.offset.nil?)
70
+ +1
71
+ # Non-overlap
72
+ when (a.end < b.offset or b.end < a.offset)
73
+ b.offset <=> a.offset
74
+ # b includes a
75
+ when (a.offset >= b.offset and a.end <= b.end)
76
+ -1
77
+ # b includes a
78
+ when (b.offset >= a.offset and b.end <= a.end)
79
+ +1
80
+ # Overlap
81
+ when (a.offset > b.offset and a.end > b.end or b.offset < a.offset and b.end > a.end)
82
+ a.length <=> b.length
83
+ else
84
+ raise "Unexpected case in sort: #{a.range} - #{b.range}"
85
+ end
86
+ end
45
87
  end
46
88
 
47
- def transformed_set(pos, value)
48
- transformed_pos = case
49
- when Range === pos
50
- transform_range(pos)
51
- when Integer === pos
52
- transform_pos(pos)
53
- else
54
- raise "Text position not understood '#{pos.inspect}'. Not Range or Integer"
55
- end
56
-
57
- self[transformed_pos] = value
58
- end
89
+ def replace(segments, replacement = nil, &block)
90
+ @transformed_segments ||= {}
91
+ @transformation_stack ||= []
92
+ stack = []
59
93
 
60
- def transformed_get(pos)
61
- transformed_pos = case
62
- when Range === pos
63
- transform_range(pos)
64
- when Integer === pos
65
- transform_pos(pos)
66
- else
67
- raise "Text position not understood '#{pos.inspect}'. Not Range or Integer"
68
- end
69
-
70
- self[transformed_pos]
71
- end
94
+ Transformed.sort(segments).each do |segment|
95
+ next if segment.offset.nil?
96
+ shift = shift segment.range
72
97
 
73
- def conflict?(segment_range)
74
- return false if @transformation_offset_differences.nil? or @transformation_offset_differences.empty?
75
- transformation_offset_difference = @transformation_offset_differences.last
98
+ next if shift.nil?
76
99
 
77
- transformation_offset_difference.each do |info|
78
- offset, diff, orig_length, trans_length = info
79
- return true if segment_range.begin > offset and segment_range.begin < offset + trans_length or
80
- segment_range.end > offset and segment_range.end < offset + trans_length
81
- end
100
+ shift_begin, shift_end = shift
82
101
 
83
- return false
84
- end
102
+ text_offset = self.respond_to?(:offset)? self.offset : 0
103
+ updated_begin = segment.offset + shift_begin - text_offset
104
+ updated_end = segment.range.last + shift_end - text_offset
85
105
 
86
- def replace(segments, replacement = nil, &block)
87
- replacement ||= block
88
- raise "No replacement given" if replacement.nil?
89
- transformation_offset_differences = []
90
- transformation_original = []
106
+ updated_range = (updated_begin..updated_end)
91
107
 
92
- Segment.clean_sort(segments).reverse.each do |segment|
93
- untransformed_segment_range_here= segment.range_in(self)
94
- transformed_segment_range = self.transform_range(untransformed_segment_range_here)
95
- next if conflict?(transformed_segment_range)
108
+ updated_text = self[updated_begin..updated_end]
96
109
 
97
- text_before_transform = self[transformed_segment_range]
110
+ original_text = segment.dup
111
+ segment.replace updated_text
98
112
 
99
113
  case
114
+ when block_given?
115
+ new = block.call(segment)
100
116
  when String === replacement
101
- transformed_text = replacement
117
+ new = replacement
102
118
  when Proc === replacement
119
+ new = replacement.call(segment)
120
+ end
103
121
 
104
- # Prepare segment with new text
105
- save_segment_text = segment.dup
106
- save_offset = segment.offset
107
- segment.replace text_before_transform
108
- segment.offset = transformed_segment_range.begin
122
+ diff = new.length - segment.length
109
123
 
110
- transformed_text = replacement.call segment
124
+ self[updated_begin..updated_end] = new
111
125
 
112
- # Restore segment with original text
113
- segment.replace save_segment_text
114
- segment.offset = save_offset
115
- else
116
- raise "Replacemente not String nor Proc"
117
- end
118
- diff = segment.length - transformed_text.length
119
- self[transformed_segment_range] = transformed_text
126
+ @transformed_segments[segment.object_id] = [segment.range, diff, updated_text, updated_range, @transformed_segments.size]
120
127
 
121
- transformation_offset_differences << [untransformed_segment_range_here.begin, diff, text_before_transform.length, transformed_text.length]
122
- transformation_original << text_before_transform
128
+ segment.replace original_text
129
+ stack << segment.object_id
123
130
  end
131
+ @transformation_stack << stack
132
+ end
124
133
 
125
- @transformation_offset_differences ||= []
126
- @transformation_offset_differences << transformation_offset_differences
127
- @transformation_original ||= []
128
- @transformation_original << transformation_original
134
+ def fix_segment(segment, range, diff)
135
+ case
136
+ # Before
137
+ when segment.end < range.begin
138
+ # After
139
+ when segment.offset > range.end + diff
140
+ segment.offset -= diff
141
+ # Includes
142
+ when (segment.offset <= range.begin and segment.end >= range.end + diff)
143
+ segment.replace self[segment.offset..segment.end - diff]
144
+ else
145
+ raise "Segment Overlaps"
146
+ end
129
147
  end
130
148
 
131
- def restore(segments = nil, first_only = false)
132
- stop = false
133
- while self.transformation_offset_differences.any? and not stop
134
- transformation_offset_differences = self.transformation_offset_differences.pop
135
- transformation_original = self.transformation_original.pop
149
+ def restore(segments, first_only = false)
150
+ return segments if @transformation_stack.empty?
136
151
 
137
- ranges = transformation_offset_differences.collect do |offset,diff,orig_length,rep_length|
138
- (offset..(offset + rep_length - 1))
139
- end
152
+ if first_only
153
+ @transformation_stack.pop.reverse.each do |id|
154
+ orig_range, diff, text, range = @transformed_segments.delete id
140
155
 
141
- ranges.zip(transformation_original).reverse.each do |range,text|
142
- self.transformed_set(range, text)
156
+ new_range = (range.begin..range.last + diff)
157
+ self[new_range] = text
158
+ segments.each do |segment|
159
+ next unless Segment === segment
160
+ fix_segment(segment, range, diff)
161
+ end if Array === segments
143
162
  end
144
-
145
- stop = true if first_only
146
-
147
- next if segments.nil?
148
-
149
- segment_ranges = segments.each do |segment|
150
- r = segment.range
151
-
152
- s = r.begin
153
- e = r.end
154
- sdiff = 0
155
- ediff = 0
156
- transformation_offset_differences.reverse.each do |offset,diff,orig_length,rep_length|
157
- sdiff += diff if offset < s
158
- ediff += diff if offset + rep_length - 1 < e
159
- end
160
-
161
- segment.offset = s + sdiff
162
- segment.replace self[(s+sdiff)..(e + ediff)]
163
+ segments
164
+ else
165
+ while @transformation_stack.any?
166
+ restore(segments, true)
163
167
  end
168
+ segments
164
169
  end
165
-
166
- segments
167
170
  end
168
171
  end
169
-
170
-
@@ -110,7 +110,7 @@ class TokenTrieNER < NER
110
110
  end
111
111
 
112
112
  def self.merge(index1, index2)
113
- index1.write if index1.respond_to? :write
113
+ index1.write if index1.respond_to? :write and not index1.write?
114
114
  index2.each do |key, new_index2|
115
115
  case
116
116
  when key == :END
@@ -119,7 +119,8 @@ class TokenTrieNER < NER
119
119
  end1.uniq!
120
120
  index1[:END] = end1
121
121
  when index1.include?(key)
122
- index1[key] = merge(index1[key], new_index2)
122
+ new = merge(index1[key], new_index2)
123
+ index1[key] = new
123
124
  else
124
125
  index1[key] = new_index2
125
126
  end
@@ -148,7 +149,10 @@ class TokenTrieNER < NER
148
149
  tokens = Array === name ? name : tokenize(name, false, split_at, no_clean)
149
150
  tokens.extend EnumeratedArray
150
151
 
151
- tmp_index = merge(tmp_index, index_for_tokens(tokens, code, type, slack)) unless tokens.empty?
152
+ token_index = index_for_tokens(tokens, code, type, slack)
153
+
154
+ tmp_index = merge(tmp_index, token_index) unless tokens.empty?
155
+
152
156
  items_in_chunk += 1
153
157
 
154
158
  if items_in_chunk > chunk_size
@@ -267,22 +271,22 @@ class TokenTrieNER < NER
267
271
  TokenTrieNER.merge(@index, new.index)
268
272
  when TSV === new
269
273
  Log.debug "TokenTrieNER merging TSV"
270
- old_unnamed = new.unnamed
271
- old_monitor = new.monitor
272
- new.unnamed = true
273
- new.monitor = {:step => 1000, :desc => "Processing TSV into TokenTrieNER"}
274
- TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
275
- new.unnamed = old_unnamed
276
- new.monitor = old_monitor
274
+ new.with_unnamed do
275
+ new.with_monitor({:step => 1000, :desc => "Processing TSV into TokenTrieNER"}) do
276
+ TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
277
+ end
278
+ end
277
279
  when Hash === new
278
280
  Log.debug "TokenTrieNER merging Hash"
279
281
  TokenTrieNER.merge(@index, new)
280
282
  when String === new
281
283
  Log.debug "TokenTrieNER merging file: #{ new }"
282
284
  new = TSV.open(new, :flat)
283
- new.unnamed = true
284
- new.monitor = {:step => 1000, :desc => "Processing TSV into TokenTrieNER"}
285
- TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
285
+ new.with_unnamed do
286
+ new.with_monitor({:step => 1000, :desc => "Processing TSV into TokenTrieNER"}) do
287
+ TokenTrieNER.process(@index, new, type, slack, split_at, no_clean)
288
+ end
289
+ end
286
290
  end
287
291
  end
288
292
 
@@ -2,7 +2,19 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '../../test_helper.r
2
2
  require 'rbbt/corpus/document'
3
3
  require 'test/unit'
4
4
 
5
+ module TokenEntity
6
+ extend Annotation
7
+ include Segment
8
+ self.annotation :original
9
+ end
5
10
  class Document
11
+
12
+ def tokenize(text)
13
+ Token.tokenize(text).collect do |token|
14
+ TokenEntity.setup(token.dup, token.offset, token.original)
15
+ end
16
+ end
17
+
6
18
  define :sentences do
7
19
  require 'rbbt/nlp/nlp'
8
20
  NLP.geniass_sentence_splitter(text)
@@ -10,22 +22,22 @@ class Document
10
22
 
11
23
  define :tokens do
12
24
  require 'rbbt/ner/segment/token'
13
- Token.tokenize(text)
25
+ tokenize(text)
14
26
  end
15
27
 
16
28
  define :long_words do
17
29
  require 'rbbt/ner/segment/token'
18
- Token.tokenize(text).select{|tok| tok.length > 5}
30
+ tokenize(text).select{|tok| tok.length > 5}
19
31
  end
20
32
 
21
33
  define :short_words do
22
34
  require 'rbbt/ner/segment/token'
23
- Token.tokenize(text).select{|tok| tok.length < 5}
35
+ tokenize(text).select{|tok| tok.length < 5}
24
36
  end
25
37
 
26
38
  define :even_words do
27
39
  require 'rbbt/ner/segment/token'
28
- Token.tokenize(text).select{|tok| tok.length % 2 == 0}
40
+ tokenize(text).select{|tok| tok.length % 2 == 0}
29
41
  end
30
42
 
31
43
  define :missing do
@@ -110,7 +122,7 @@ another sentence.
110
122
  doc = Document.new(dir)
111
123
  doc.text = text
112
124
 
113
- sentence = doc.sentences.last
125
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
114
126
  doc.load_into sentence, :tokens
115
127
 
116
128
  assert_equal 5, sentence.tokens.length
@@ -134,7 +146,7 @@ another sentence.
134
146
  doc = Document.new(dir)
135
147
  doc.text = text
136
148
 
137
- sentence = doc.sentences.last
149
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
138
150
  Misc.benchmark(1) do
139
151
  doc = Document.new(dir)
140
152
  doc.text = text
@@ -166,7 +178,7 @@ another sentence.
166
178
  doc = Document.new(dir)
167
179
  doc.text = text * 10
168
180
 
169
- sentence = doc.sentences.last
181
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
170
182
 
171
183
  doc.load_into sentence, :tokens, :long_words
172
184
 
@@ -178,9 +190,9 @@ another sentence.
178
190
  doc = Document.new(dir)
179
191
  doc.text = text * 10
180
192
  doc.sentences
181
- assert_equal sentence, doc.sentences.last
193
+ assert_equal sentence, doc.sentences.sort_by{|sentence| sentence.offset}.last
182
194
 
183
- sentence = doc.sentences.last
195
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
184
196
  doc.load_into sentence, :tokens, :long_words
185
197
 
186
198
  assert_equal 2, sentence.long_words.length
@@ -211,7 +223,7 @@ another sentence.
211
223
  doc.text = text * 10
212
224
  doc.docid = "TEST"
213
225
 
214
- sentence = doc.sentences.last
226
+ sentence = doc.sentences.sort_by{|sentence| sentence.offset}.last
215
227
 
216
228
  doc.load_into sentence, :tokens, :long_words, :short_words, :even_words
217
229
 
@@ -4,7 +4,7 @@ require 'rbbt/ner/segment/named_entity'
4
4
 
5
5
  class TestClass < Test::Unit::TestCase
6
6
  def test_info
7
- a = "test"
7
+ a = ["test"]
8
8
  NamedEntity.setup a
9
9
  assert(! a.info.keys.include?(:code))
10
10
  a.code = 10
@@ -1,9 +1,11 @@
1
1
  require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
2
  require 'rbbt/ner/segment/transformed'
3
3
  require 'rbbt/ner/segment/named_entity'
4
+ require 'rexml/document'
5
+ require 'rand'
4
6
 
5
7
  class TestClass < Test::Unit::TestCase
6
- def test_transform
8
+ def tttest_transform
7
9
  a = "This sentence mentions the TP53 gene and the CDK5 protein"
8
10
  original = a.dup
9
11
 
@@ -56,11 +58,13 @@ class TestClass < Test::Unit::TestCase
56
58
  Transformed.with_transform(a, [gene1], "GN") do
57
59
  assert_equal original.sub("TP53", 'GN'), a
58
60
  end
61
+
59
62
  assert_equal original, a
60
63
 
61
- Transformed.with_transform(a, [gene1,gene2], "GN") do
64
+ Transformed.with_transform(a, [gene1, gene2], "GN") do
62
65
  assert_equal original.gsub(/TP53|CDK5R1/, 'GN'), a
63
66
  end
67
+
64
68
  assert_equal original, a
65
69
 
66
70
  Transformed.with_transform(a, [gene1], "GN") do
@@ -69,6 +73,7 @@ class TestClass < Test::Unit::TestCase
69
73
  end
70
74
  assert_equal original.gsub(/TP53/, 'GN'), a
71
75
  end
76
+
72
77
  assert_equal original, a
73
78
 
74
79
  exp1, exp2 = nil, nil
@@ -169,7 +174,37 @@ class TestClass < Test::Unit::TestCase
169
174
  assert_equal one, a
170
175
  end
171
176
  end
172
-
173
177
  end
178
+
179
+ def test_error
180
+ a = "Do not have a diagnosis of another hereditary APC resistance/Factor V Leiden, Protein S or C deficiency, prothrombin gene mutation (G20210A), or acquired (lupus anticoagulant) thrombophilic disorder"
181
+
182
+ entity1 = "gene"
183
+ entity1.extend NamedEntity
184
+ entity1.offset = a.index entity1
185
+ entity1.type = "Gene"
186
+
187
+ entity2 = "prothrombin gene mutation"
188
+ entity2.extend NamedEntity
189
+ entity2.offset = a.index entity2
190
+ entity2.type = "Mutation"
191
+
192
+ entity3 = "Protein S or C"
193
+ entity3.extend NamedEntity
194
+ entity3.offset = a.index entity3
195
+ entity3.type = "Gene"
196
+
197
+ entity4 = "prothrombin gene mutation"
198
+ entity4.extend NamedEntity
199
+ entity4.offset = a.index entity2
200
+ entity4.type = "Disease"
201
+
202
+
203
+ Transformed.with_transform(a, [entity1].sort_by{rand}, Proc.new{|e| e.html}) do
204
+ Transformed.with_transform(a, [entity3, entity2, entity4].sort_by{rand}, Proc.new{|e| e.html}) do
205
+ assert_nothing_raised{REXML::Document.new "<xml>"+ a + "</xml>"}
206
+ end
207
+ end
208
+ end
174
209
  end
175
210
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 1
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 6
9
- - 2
10
- version: 0.6.2
9
+ - 3
10
+ version: 0.6.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-10-03 00:00:00 +02:00
19
- default_executable: get_ppis.rb
18
+ date: 2012-02-09 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rbbt-util
@@ -106,6 +105,7 @@ files:
106
105
  - lib/rbbt/corpus/document.rb
107
106
  - lib/rbbt/corpus/document_repo.rb
108
107
  - lib/rbbt/corpus/sources/pubmed.rb
108
+ - lib/rbbt/entity/document.rb
109
109
  - lib/rbbt/ner/NER.rb
110
110
  - lib/rbbt/ner/abner.rb
111
111
  - lib/rbbt/ner/banner.rb
@@ -161,7 +161,6 @@ files:
161
161
  - test/rbbt/corpus/test_corpus.rb
162
162
  - test/rbbt/corpus/test_document.rb
163
163
  - bin/get_ppis.rb
164
- has_rdoc: true
165
164
  homepage: http://github.com/mikisvaz/rbbt-util
166
165
  licenses: []
167
166
 
@@ -191,7 +190,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
191
190
  requirements: []
192
191
 
193
192
  rubyforge_project:
194
- rubygems_version: 1.6.2
193
+ rubygems_version: 1.8.10
195
194
  signing_key:
196
195
  specification_version: 3
197
196
  summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)