rbbt-text 1.4.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/segment.rb +17 -3
- data/test/rbbt/test_segment.rb +33 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a1a38b7a9c9f9fe0ce8fd7b8fd19a3ca39f483e8ccaaa1412af023262181fd8
|
4
|
+
data.tar.gz: ddec2f95b5c6fe9a69e67cf79a29c3ef2d9c37301e442d8664c7bbff1298a365
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b554f4db313a65e0b682e2fd4456aaf9d1b4a489ca025ad3dc89cf71df772d97807b8b2ec62e3753eb62cf93bd88d7d0555c7ca5f9d76bd52a6c6d5fd56b313d
|
7
|
+
data.tar.gz: c419af9eb52723c0a761ddc025aadad99cebd1891cbd178aff466d509b84fdf76f449dfa62ba574a0b9dbeb0c519a4341b53375f18c09e52d19fd912cf5c1188
|
data/lib/rbbt/segment.rb
CHANGED
@@ -167,15 +167,29 @@ module Segment
|
|
167
167
|
offset = text.index part
|
168
168
|
next if offset.nil?
|
169
169
|
Segment.setup(part, pre_offset + offset, docid)
|
170
|
-
pre_offset += offset + part.segment_length
|
171
|
-
text = text[(offset + part.segment_length
|
170
|
+
pre_offset += offset + part.segment_length
|
171
|
+
text = text[(offset + part.segment_length)..-1]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def self.relocate(segment, original, target, pad = 20)
|
176
|
+
if segment != target[segment.range]
|
177
|
+
start_pad = [pad, segment.offset].min
|
178
|
+
end_pad = [pad, original.length - segment.end].min
|
179
|
+
start = segment.offset - start_pad
|
180
|
+
eend = segment.end + end_pad
|
181
|
+
|
182
|
+
context = original[start..eend].gsub(/\s/,' ')
|
183
|
+
target = target.gsub(/\s/, ' ')
|
184
|
+
i = target.index context
|
185
|
+
raise "Context not found in original text" if i.nil?
|
186
|
+
segment.offset = i + start_pad
|
172
187
|
end
|
173
188
|
end
|
174
189
|
|
175
190
|
def self.index(*args)
|
176
191
|
Segment::RangeIndex.index(*args)
|
177
192
|
end
|
178
|
-
|
179
193
|
end
|
180
194
|
|
181
195
|
require 'rbbt/segment/range_index'
|
data/test/rbbt/test_segment.rb
CHANGED
@@ -134,6 +134,39 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
|
|
134
134
|
assert_equal parts.first.docid, text.docid
|
135
135
|
end
|
136
136
|
|
137
|
+
def test_align_parts
|
138
|
+
text =<<-EOF
|
139
|
+
aabbccdd
|
140
|
+
EOF
|
141
|
+
|
142
|
+
parts = %w(aa bb cc dd)
|
143
|
+
Segment.align(text, parts)
|
144
|
+
|
145
|
+
parts.each do |p|
|
146
|
+
assert_equal p, text[p.range]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
def test_relocate
|
152
|
+
original =<<-EOF
|
153
|
+
This sentences contains
|
154
|
+
a mention to gene TP53
|
155
|
+
This is a followup sentence
|
156
|
+
EOF
|
157
|
+
|
158
|
+
target = <<-EOF
|
159
|
+
This sentence is added before
|
160
|
+
This sentences contains a mention to gene TP53
|
161
|
+
This is a followup sentence
|
162
|
+
EOF
|
163
|
+
|
164
|
+
segment = Segment.setup("TP53")
|
165
|
+
Segment.align(original, [segment])
|
166
|
+
Segment.relocate(segment, original, target)
|
167
|
+
assert_equal segment, target[segment.range]
|
168
|
+
end
|
169
|
+
|
137
170
|
def test_segment_index
|
138
171
|
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
139
172
|
Document.setup(text, "TEST", "test_doc1", nil)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|