rbbt-text 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/segment.rb +15 -1
- data/test/rbbt/test_segment.rb +19 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d969cb752d1e7bc80458663d989ca4f58a2e134b7f708748dcf8383ca44f01d3
|
4
|
+
data.tar.gz: b0df4c7e9bb43f47b6031965b70a21ea9e3c7a12e012747d38776ad785f580e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7436bae2f407303bb81b812b586ec09bb97a70b2272f386f6bd574b257cde6a22789b362a22dd89546761c38147214423e2e53e58ff73ae6553630e34e2f6d2
|
7
|
+
data.tar.gz: f60cfc48e60112b2639d182eda684bed33f9249e1aeabdef220b77af05f4b77eb07f6f55d9e980c8372f2030e126847369c5a80dded711217b3fd78520a00db6
|
data/lib/rbbt/segment.rb
CHANGED
@@ -172,10 +172,24 @@ module Segment
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
+
def self.relocate(segment, original, target, pad = 20)
|
176
|
+
if segment != target[segment.range]
|
177
|
+
start_pad = [pad, segment.offset].min
|
178
|
+
end_pad = [pad, original.length - segment.end].min
|
179
|
+
start = segment.offset - start_pad
|
180
|
+
eend = segment.end + end_pad
|
181
|
+
|
182
|
+
context = original[start..eend].gsub(/\s/,' ')
|
183
|
+
target = target.gsub(/\s/, ' ')
|
184
|
+
i = target.index context
|
185
|
+
raise "Context not found in original text" if i.nil?
|
186
|
+
segment.offset = i + start_pad
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
175
190
|
def self.index(*args)
|
176
191
|
Segment::RangeIndex.index(*args)
|
177
192
|
end
|
178
|
-
|
179
193
|
end
|
180
194
|
|
181
195
|
require 'rbbt/segment/range_index'
|
data/test/rbbt/test_segment.rb
CHANGED
@@ -134,6 +134,25 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
|
|
134
134
|
assert_equal parts.first.docid, text.docid
|
135
135
|
end
|
136
136
|
|
137
|
+
def test_relocate
|
138
|
+
original =<<-EOF
|
139
|
+
This sentences contains
|
140
|
+
a mention to gene TP53
|
141
|
+
This is a followup sentence
|
142
|
+
EOF
|
143
|
+
|
144
|
+
target = <<-EOF
|
145
|
+
This sentence is added before
|
146
|
+
This sentences contains a mention to gene TP53
|
147
|
+
This is a followup sentence
|
148
|
+
EOF
|
149
|
+
|
150
|
+
segment = Segment.setup("TP53")
|
151
|
+
Segment.align(original, [segment])
|
152
|
+
Segment.relocate(segment, original, target)
|
153
|
+
assert_equal segment, target[segment.range]
|
154
|
+
end
|
155
|
+
|
137
156
|
def test_segment_index
|
138
157
|
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
139
158
|
Document.setup(text, "TEST", "test_doc1", nil)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|