rbbt-text 1.4.0 → 1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/segment.rb +17 -3
- data/test/rbbt/test_segment.rb +33 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2a1a38b7a9c9f9fe0ce8fd7b8fd19a3ca39f483e8ccaaa1412af023262181fd8
|
4
|
+
data.tar.gz: ddec2f95b5c6fe9a69e67cf79a29c3ef2d9c37301e442d8664c7bbff1298a365
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b554f4db313a65e0b682e2fd4456aaf9d1b4a489ca025ad3dc89cf71df772d97807b8b2ec62e3753eb62cf93bd88d7d0555c7ca5f9d76bd52a6c6d5fd56b313d
|
7
|
+
data.tar.gz: c419af9eb52723c0a761ddc025aadad99cebd1891cbd178aff466d509b84fdf76f449dfa62ba574a0b9dbeb0c519a4341b53375f18c09e52d19fd912cf5c1188
|
data/lib/rbbt/segment.rb
CHANGED
@@ -167,15 +167,29 @@ module Segment
|
|
167
167
|
offset = text.index part
|
168
168
|
next if offset.nil?
|
169
169
|
Segment.setup(part, pre_offset + offset, docid)
|
170
|
-
pre_offset += offset + part.segment_length
|
171
|
-
text = text[(offset + part.segment_length
|
170
|
+
pre_offset += offset + part.segment_length
|
171
|
+
text = text[(offset + part.segment_length)..-1]
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def self.relocate(segment, original, target, pad = 20)
|
176
|
+
if segment != target[segment.range]
|
177
|
+
start_pad = [pad, segment.offset].min
|
178
|
+
end_pad = [pad, original.length - segment.end].min
|
179
|
+
start = segment.offset - start_pad
|
180
|
+
eend = segment.end + end_pad
|
181
|
+
|
182
|
+
context = original[start..eend].gsub(/\s/,' ')
|
183
|
+
target = target.gsub(/\s/, ' ')
|
184
|
+
i = target.index context
|
185
|
+
raise "Context not found in original text" if i.nil?
|
186
|
+
segment.offset = i + start_pad
|
172
187
|
end
|
173
188
|
end
|
174
189
|
|
175
190
|
def self.index(*args)
|
176
191
|
Segment::RangeIndex.index(*args)
|
177
192
|
end
|
178
|
-
|
179
193
|
end
|
180
194
|
|
181
195
|
require 'rbbt/segment/range_index'
|
data/test/rbbt/test_segment.rb
CHANGED
@@ -134,6 +134,39 @@ Atypical teratoid/rhabdoid tumors (AT/RTs) are highly aggressive brain tumors of
|
|
134
134
|
assert_equal parts.first.docid, text.docid
|
135
135
|
end
|
136
136
|
|
137
|
+
def test_align_parts
|
138
|
+
text =<<-EOF
|
139
|
+
aabbccdd
|
140
|
+
EOF
|
141
|
+
|
142
|
+
parts = %w(aa bb cc dd)
|
143
|
+
Segment.align(text, parts)
|
144
|
+
|
145
|
+
parts.each do |p|
|
146
|
+
assert_equal p, text[p.range]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
def test_relocate
|
152
|
+
original =<<-EOF
|
153
|
+
This sentences contains
|
154
|
+
a mention to gene TP53
|
155
|
+
This is a followup sentence
|
156
|
+
EOF
|
157
|
+
|
158
|
+
target = <<-EOF
|
159
|
+
This sentence is added before
|
160
|
+
This sentences contains a mention to gene TP53
|
161
|
+
This is a followup sentence
|
162
|
+
EOF
|
163
|
+
|
164
|
+
segment = Segment.setup("TP53")
|
165
|
+
Segment.align(original, [segment])
|
166
|
+
Segment.relocate(segment, original, target)
|
167
|
+
assert_equal segment, target[segment.range]
|
168
|
+
end
|
169
|
+
|
137
170
|
def test_segment_index
|
138
171
|
text = "This sentence mentions the TP53 gene and the CDK5R1 protein"
|
139
172
|
Document.setup(text, "TEST", "test_doc1", nil)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-02-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|