rbbt-text 1.4.0 → 1.5.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 581a8bf4e03fad79e2650c65ac6c445d83f89a1d237114c91e8ba37b11c1c4f4
4
- data.tar.gz: 58f2fb21eee7ac37ca30a771609f6b0c394a2b646690fbcd59b0d623261e0522
3
+ metadata.gz: 2a1a38b7a9c9f9fe0ce8fd7b8fd19a3ca39f483e8ccaaa1412af023262181fd8
4
+ data.tar.gz: ddec2f95b5c6fe9a69e67cf79a29c3ef2d9c37301e442d8664c7bbff1298a365
5
5
  SHA512:
6
- metadata.gz: 646340e7dc850bbe4232f30e947f68b8801b51b6d3e0ded92f378534459993cf08c21885685fe4e11a171026f0b0f09d331fdbdc70d97e81579f0ad53f886ee2
7
- data.tar.gz: 68b0095f69e08562a22763201c8175e5f6e10d9106118f9dd3b2920a0ae63111f6296dd03b66fd1ab6a5672898a8c4e87d2ca5190671e60ad44b9ea0e6dab78c
6
+ metadata.gz: b554f4db313a65e0b682e2fd4456aaf9d1b4a489ca025ad3dc89cf71df772d97807b8b2ec62e3753eb62cf93bd88d7d0555c7ca5f9d76bd52a6c6d5fd56b313d
7
+ data.tar.gz: c419af9eb52723c0a761ddc025aadad99cebd1891cbd178aff466d509b84fdf76f449dfa62ba574a0b9dbeb0c519a4341b53375f18c09e52d19fd912cf5c1188
data/lib/rbbt/segment.rb CHANGED
@@ -167,15 +167,29 @@ module Segment
167
167
  offset = text.index part
168
168
  next if offset.nil?
169
169
  Segment.setup(part, pre_offset + offset, docid)
170
- pre_offset += offset + part.segment_length - 1
171
- text = text[(offset + part.segment_length - 1)..-1]
170
+ pre_offset += offset + part.segment_length
171
+ text = text[(offset + part.segment_length)..-1]
172
+ end
173
+ end
174
+
175
+ def self.relocate(segment, original, target, pad = 20)
176
+ if segment != target[segment.range]
177
+ start_pad = [pad, segment.offset].min
178
+ end_pad = [pad, original.length - segment.end].min
179
+ start = segment.offset - start_pad
180
+ eend = segment.end + end_pad
181
+
182
+ context = original[start..eend].gsub(/\s/,' ')
183
+ target = target.gsub(/\s/, ' ')
184
+ i = target.index context
185
+ raise "Context not found in original text" if i.nil?
186
+ segment.offset = i + start_pad
172
187
  end
173
188
  end
174
189
 
175
190
  def self.index(*args)
176
191
  Segment::RangeIndex.index(*args)
177
192
  end
178
-
179
193
  end
180
194
 
181
195
  require 'rbbt/segment/range_index'
@@ -134,6 +134,39 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
134
134
  assert_equal parts.first.docid, text.docid
135
135
  end
136
136
 
137
+ def test_align_parts
138
+ text =<<-EOF
139
+ aabbccdd
140
+ EOF
141
+
142
+ parts = %w(aa bb cc dd)
143
+ Segment.align(text, parts)
144
+
145
+ parts.each do |p|
146
+ assert_equal p, text[p.range]
147
+ end
148
+ end
149
+
150
+
151
+ def test_relocate
152
+ original =<<-EOF
153
+ This sentences contains
154
+ a mention to gene TP53
155
+ This is a followup sentence
156
+ EOF
157
+
158
+ target = <<-EOF
159
+ This sentence is added before
160
+ This sentences contains a mention to gene TP53
161
+ This is a followup sentence
162
+ EOF
163
+
164
+ segment = Segment.setup("TP53")
165
+ Segment.align(original, [segment])
166
+ Segment.relocate(segment, original, target)
167
+ assert_equal segment, target[segment.range]
168
+ end
169
+
137
170
  def test_segment_index
138
171
  text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
139
172
  Document.setup(text, "TEST", "test_doc1", nil)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util