rbbt-text 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 581a8bf4e03fad79e2650c65ac6c445d83f89a1d237114c91e8ba37b11c1c4f4
4
- data.tar.gz: 58f2fb21eee7ac37ca30a771609f6b0c394a2b646690fbcd59b0d623261e0522
3
+ metadata.gz: d969cb752d1e7bc80458663d989ca4f58a2e134b7f708748dcf8383ca44f01d3
4
+ data.tar.gz: b0df4c7e9bb43f47b6031965b70a21ea9e3c7a12e012747d38776ad785f580e3
5
5
  SHA512:
6
- metadata.gz: 646340e7dc850bbe4232f30e947f68b8801b51b6d3e0ded92f378534459993cf08c21885685fe4e11a171026f0b0f09d331fdbdc70d97e81579f0ad53f886ee2
7
- data.tar.gz: 68b0095f69e08562a22763201c8175e5f6e10d9106118f9dd3b2920a0ae63111f6296dd03b66fd1ab6a5672898a8c4e87d2ca5190671e60ad44b9ea0e6dab78c
6
+ metadata.gz: c7436bae2f407303bb81b812b586ec09bb97a70b2272f386f6bd574b257cde6a22789b362a22dd89546761c38147214423e2e53e58ff73ae6553630e34e2f6d2
7
+ data.tar.gz: f60cfc48e60112b2639d182eda684bed33f9249e1aeabdef220b77af05f4b77eb07f6f55d9e980c8372f2030e126847369c5a80dded711217b3fd78520a00db6
data/lib/rbbt/segment.rb CHANGED
@@ -172,10 +172,24 @@ module Segment
172
172
  end
173
173
  end
174
174
 
175
+ def self.relocate(segment, original, target, pad = 20)
176
+ if segment != target[segment.range]
177
+ start_pad = [pad, segment.offset].min
178
+ end_pad = [pad, original.length - segment.end].min
179
+ start = segment.offset - start_pad
180
+ eend = segment.end + end_pad
181
+
182
+ context = original[start..eend].gsub(/\s/,' ')
183
+ target = target.gsub(/\s/, ' ')
184
+ i = target.index context
185
+ raise "Context not found in original text" if i.nil?
186
+ segment.offset = i + start_pad
187
+ end
188
+ end
189
+
175
190
  def self.index(*args)
176
191
  Segment::RangeIndex.index(*args)
177
192
  end
178
-
179
193
  end
180
194
 
181
195
  require 'rbbt/segment/range_index'
@@ -134,6 +134,25 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
134
134
  assert_equal parts.first.docid, text.docid
135
135
  end
136
136
 
137
+ def test_relocate
138
+ original =<<-EOF
139
+ This sentences contains
140
+ a mention to gene TP53
141
+ This is a followup sentence
142
+ EOF
143
+
144
+ target = <<-EOF
145
+ This sentence is added before
146
+ This sentences contains a mention to gene TP53
147
+ This is a followup sentence
148
+ EOF
149
+
150
+ segment = Segment.setup("TP53")
151
+ Segment.align(original, [segment])
152
+ Segment.relocate(segment, original, target)
153
+ assert_equal segment, target[segment.range]
154
+ end
155
+
137
156
  def test_segment_index
138
157
  text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
139
158
  Document.setup(text, "TEST", "test_doc1", nil)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util