rbbt-text 1.4.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 581a8bf4e03fad79e2650c65ac6c445d83f89a1d237114c91e8ba37b11c1c4f4
4
- data.tar.gz: 58f2fb21eee7ac37ca30a771609f6b0c394a2b646690fbcd59b0d623261e0522
3
+ metadata.gz: 2a1a38b7a9c9f9fe0ce8fd7b8fd19a3ca39f483e8ccaaa1412af023262181fd8
4
+ data.tar.gz: ddec2f95b5c6fe9a69e67cf79a29c3ef2d9c37301e442d8664c7bbff1298a365
5
5
  SHA512:
6
- metadata.gz: 646340e7dc850bbe4232f30e947f68b8801b51b6d3e0ded92f378534459993cf08c21885685fe4e11a171026f0b0f09d331fdbdc70d97e81579f0ad53f886ee2
7
- data.tar.gz: 68b0095f69e08562a22763201c8175e5f6e10d9106118f9dd3b2920a0ae63111f6296dd03b66fd1ab6a5672898a8c4e87d2ca5190671e60ad44b9ea0e6dab78c
6
+ metadata.gz: b554f4db313a65e0b682e2fd4456aaf9d1b4a489ca025ad3dc89cf71df772d97807b8b2ec62e3753eb62cf93bd88d7d0555c7ca5f9d76bd52a6c6d5fd56b313d
7
+ data.tar.gz: c419af9eb52723c0a761ddc025aadad99cebd1891cbd178aff466d509b84fdf76f449dfa62ba574a0b9dbeb0c519a4341b53375f18c09e52d19fd912cf5c1188
data/lib/rbbt/segment.rb CHANGED
@@ -167,15 +167,29 @@ module Segment
167
167
  offset = text.index part
168
168
  next if offset.nil?
169
169
  Segment.setup(part, pre_offset + offset, docid)
170
- pre_offset += offset + part.segment_length - 1
171
- text = text[(offset + part.segment_length - 1)..-1]
170
+ pre_offset += offset + part.segment_length
171
+ text = text[(offset + part.segment_length)..-1]
172
+ end
173
+ end
174
+
175
+ def self.relocate(segment, original, target, pad = 20)
176
+ if segment != target[segment.range]
177
+ start_pad = [pad, segment.offset].min
178
+ end_pad = [pad, original.length - segment.end].min
179
+ start = segment.offset - start_pad
180
+ eend = segment.end + end_pad
181
+
182
+ context = original[start..eend].gsub(/\s/,' ')
183
+ target = target.gsub(/\s/, ' ')
184
+ i = target.index context
185
+ raise "Context not found in original text" if i.nil?
186
+ segment.offset = i + start_pad
172
187
  end
173
188
  end
174
189
 
175
190
  def self.index(*args)
176
191
  Segment::RangeIndex.index(*args)
177
192
  end
178
-
179
193
  end
180
194
 
181
195
  require 'rbbt/segment/range_index'
@@ -134,6 +134,39 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
134
134
  assert_equal parts.first.docid, text.docid
135
135
  end
136
136
 
137
+ def test_align_parts
138
+ text =<<-EOF
139
+ aabbccdd
140
+ EOF
141
+
142
+ parts = %w(aa bb cc dd)
143
+ Segment.align(text, parts)
144
+
145
+ parts.each do |p|
146
+ assert_equal p, text[p.range]
147
+ end
148
+ end
149
+
150
+
151
+ def test_relocate
152
+ original =<<-EOF
153
+ This sentences contains
154
+ a mention to gene TP53
155
+ This is a followup sentence
156
+ EOF
157
+
158
+ target = <<-EOF
159
+ This sentence is added before
160
+ This sentences contains a mention to gene TP53
161
+ This is a followup sentence
162
+ EOF
163
+
164
+ segment = Segment.setup("TP53")
165
+ Segment.align(original, [segment])
166
+ Segment.relocate(segment, original, target)
167
+ assert_equal segment, target[segment.range]
168
+ end
169
+
137
170
  def test_segment_index
138
171
  text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
139
172
  Document.setup(text, "TEST", "test_doc1", nil)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-02-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util