rbbt-text 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 581a8bf4e03fad79e2650c65ac6c445d83f89a1d237114c91e8ba37b11c1c4f4
4
- data.tar.gz: 58f2fb21eee7ac37ca30a771609f6b0c394a2b646690fbcd59b0d623261e0522
3
+ metadata.gz: d969cb752d1e7bc80458663d989ca4f58a2e134b7f708748dcf8383ca44f01d3
4
+ data.tar.gz: b0df4c7e9bb43f47b6031965b70a21ea9e3c7a12e012747d38776ad785f580e3
5
5
  SHA512:
6
- metadata.gz: 646340e7dc850bbe4232f30e947f68b8801b51b6d3e0ded92f378534459993cf08c21885685fe4e11a171026f0b0f09d331fdbdc70d97e81579f0ad53f886ee2
7
- data.tar.gz: 68b0095f69e08562a22763201c8175e5f6e10d9106118f9dd3b2920a0ae63111f6296dd03b66fd1ab6a5672898a8c4e87d2ca5190671e60ad44b9ea0e6dab78c
6
+ metadata.gz: c7436bae2f407303bb81b812b586ec09bb97a70b2272f386f6bd574b257cde6a22789b362a22dd89546761c38147214423e2e53e58ff73ae6553630e34e2f6d2
7
+ data.tar.gz: f60cfc48e60112b2639d182eda684bed33f9249e1aeabdef220b77af05f4b77eb07f6f55d9e980c8372f2030e126847369c5a80dded711217b3fd78520a00db6
data/lib/rbbt/segment.rb CHANGED
@@ -172,10 +172,24 @@ module Segment
172
172
  end
173
173
  end
174
174
 
175
+ def self.relocate(segment, original, target, pad = 20)
176
+ if segment != target[segment.range]
177
+ start_pad = [pad, segment.offset].min
178
+ end_pad = [pad, original.length - segment.end].min
179
+ start = segment.offset - start_pad
180
+ eend = segment.end + end_pad
181
+
182
+ context = original[start..eend].gsub(/\s/,' ')
183
+ target = target.gsub(/\s/, ' ')
184
+ i = target.index context
185
+ raise "Context not found in original text" if i.nil?
186
+ segment.offset = i + start_pad
187
+ end
188
+ end
189
+
175
190
  def self.index(*args)
176
191
  Segment::RangeIndex.index(*args)
177
192
  end
178
-
179
193
  end
180
194
 
181
195
  require 'rbbt/segment/range_index'
@@ -134,6 +134,25 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
134
134
  assert_equal parts.first.docid, text.docid
135
135
  end
136
136
 
137
+ def test_relocate
138
+ original =<<-EOF
139
+ This sentences contains
140
+ a mention to gene TP53
141
+ This is a followup sentence
142
+ EOF
143
+
144
+ target = <<-EOF
145
+ This sentence is added before
146
+ This sentences contains a mention to gene TP53
147
+ This is a followup sentence
148
+ EOF
149
+
150
+ segment = Segment.setup("TP53")
151
+ Segment.align(original, [segment])
152
+ Segment.relocate(segment, original, target)
153
+ assert_equal segment, target[segment.range]
154
+ end
155
+
137
156
  def test_segment_index
138
157
  text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
139
158
  Document.setup(text, "TEST", "test_doc1", nil)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-text
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-12-21 00:00:00.000000000 Z
11
+ date: 2024-02-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util