rbbt-text 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/segment.rb +15 -1
- data/test/rbbt/test_segment.rb +19 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d969cb752d1e7bc80458663d989ca4f58a2e134b7f708748dcf8383ca44f01d3
|
4
|
+
data.tar.gz: b0df4c7e9bb43f47b6031965b70a21ea9e3c7a12e012747d38776ad785f580e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c7436bae2f407303bb81b812b586ec09bb97a70b2272f386f6bd574b257cde6a22789b362a22dd89546761c38147214423e2e53e58ff73ae6553630e34e2f6d2
|
7
|
+
data.tar.gz: f60cfc48e60112b2639d182eda684bed33f9249e1aeabdef220b77af05f4b77eb07f6f55d9e980c8372f2030e126847369c5a80dded711217b3fd78520a00db6
|
data/lib/rbbt/segment.rb
CHANGED
@@ -172,10 +172,24 @@ module Segment
|
|
172
172
|
end
|
173
173
|
end
|
174
174
|
|
175
|
+
def self.relocate(segment, original, target, pad = 20)
|
176
|
+
if segment != target[segment.range]
|
177
|
+
start_pad = [pad, segment.offset].min
|
178
|
+
end_pad = [pad, original.length - segment.end].min
|
179
|
+
start = segment.offset - start_pad
|
180
|
+
eend = segment.end + end_pad
|
181
|
+
|
182
|
+
context = original[start..eend].gsub(/\s/,' ')
|
183
|
+
target = target.gsub(/\s/, ' ')
|
184
|
+
i = target.index context
|
185
|
+
raise "Context not found in original text" if i.nil?
|
186
|
+
segment.offset = i + start_pad
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
175
190
|
def self.index(*args)
|
176
191
|
Segment::RangeIndex.index(*args)
|
177
192
|
end
|
178
|
-
|
179
193
|
end
|
180
194
|
|
181
195
|
require 'rbbt/segment/range_index'
|
data/test/rbbt/test_segment.rb
CHANGED
@@ -134,6 +134,25 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
|
|
134
134
|
assert_equal parts.first.docid, text.docid
|
135
135
|
end
|
136
136
|
|
137
|
+
def test_relocate
|
138
|
+
original =<<-EOF
|
139
|
+
This sentences contains
|
140
|
+
a mention to gene TP53
|
141
|
+
This is a followup sentence
|
142
|
+
EOF
|
143
|
+
|
144
|
+
target = <<-EOF
|
145
|
+
This sentence is added before
|
146
|
+
This sentences contains a mention to gene TP53
|
147
|
+
This is a followup sentence
|
148
|
+
EOF
|
149
|
+
|
150
|
+
segment = Segment.setup("TP53")
|
151
|
+
Segment.align(original, [segment])
|
152
|
+
Segment.relocate(segment, original, target)
|
153
|
+
assert_equal segment, target[segment.range]
|
154
|
+
end
|
155
|
+
|
137
156
|
def test_segment_index
|
138
157
|
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
139
158
|
Document.setup(text, "TEST", "test_doc1", nil)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|