rbbt-text 1.5.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/document/annotation.rb +3 -2
- data/lib/rbbt/document/corpus.rb +5 -2
- data/lib/rbbt/document.rb +1 -1
- data/lib/rbbt/ner/pubtator.rb +0 -1
- data/lib/rbbt/ner/token_trieNER.rb +3 -3
- data/lib/rbbt/segment/annotation.rb +1 -1
- data/test/rbbt/ner/test_pubtator.rb +2 -2
- data/test/rbbt/ner/test_rnorm.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fe1271c362c3e5f912d1d2236587dfd3f5202c8bef0246b547f3d109d3a321b5
|
4
|
+
data.tar.gz: abbd7559a60012f910a7d5c528484bf10950109772c6fd81cffba85efc0541bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf1b61c0464eb675861694e21c597f0770b37b241e72008225d49e826ddeb93877c50ceaa63b5a05daee1274d9ce3b433308d32ff71fe9455e14af1c55ac7eca
|
7
|
+
data.tar.gz: 532381c12fc4886b2c178252c1a5d2df6cd45921e1725a8dc1a45f4325044710822ab67fe431e361bb7b14159d92058deeb7257c52f0f86e94fd88f851669933
|
@@ -3,8 +3,8 @@ require 'rbbt/segment/annotation'
|
|
3
3
|
|
4
4
|
module Document
|
5
5
|
def self.define(type, &block)
|
6
|
-
send :property, type do
|
7
|
-
segments = self.instance_exec &block
|
6
|
+
send :property, type do |*args,**kwargs|
|
7
|
+
segments = self.instance_exec *args, **kwargs, &block
|
8
8
|
|
9
9
|
Segment.align(self, segments) unless segments.empty? ||
|
10
10
|
(Segment === segments && segments.offset) ||
|
@@ -53,6 +53,7 @@ module Document
|
|
53
53
|
|
54
54
|
def self.define_multiple(type, &block)
|
55
55
|
send :property, type => :multiple do |list|
|
56
|
+
list = self if Array === self
|
56
57
|
doc_segments = self.instance_exec list, &block
|
57
58
|
|
58
59
|
doc_segments = doc_segments.chunked_values_at(list) if Hash === doc_segments
|
data/lib/rbbt/document/corpus.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'rbbt-util'
|
2
|
+
require 'rbbt/tsv'
|
2
3
|
|
3
4
|
module Document::Corpus
|
4
5
|
|
@@ -12,7 +13,9 @@ module Document::Corpus
|
|
12
13
|
|
13
14
|
def add_document(document)
|
14
15
|
docid = document.docid
|
15
|
-
|
16
|
+
self.read_and_close do
|
17
|
+
return self[docid] if self.include?(docid)
|
18
|
+
end
|
16
19
|
self.write_and_close do
|
17
20
|
self[docid] = document
|
18
21
|
end
|
@@ -34,7 +37,7 @@ module Document::Corpus
|
|
34
37
|
def [](*args)
|
35
38
|
docid, *rest = args
|
36
39
|
|
37
|
-
res = self.
|
40
|
+
res = self.read_and_close do
|
38
41
|
super(*args)
|
39
42
|
end
|
40
43
|
|
data/lib/rbbt/document.rb
CHANGED
data/lib/rbbt/ner/pubtator.rb
CHANGED
@@ -266,8 +266,8 @@ class TokenTrieNER < NER
|
|
266
266
|
file = [] if file.nil?
|
267
267
|
file = [file] unless Array === file
|
268
268
|
persist_options = Misc.pull_keys options, :persist
|
269
|
-
@index = Persist.persist_tsv(file, options, persist_options) do |data|
|
270
|
-
data.serializer = :marshal if data.respond_to?
|
269
|
+
@index = Persist.persist_tsv(file, nil, options, persist_options) do |data|
|
270
|
+
data.serializer = :marshal if data.respond_to?(:serializer) and (data.serializer == :type || data.serializer.nil?)
|
271
271
|
|
272
272
|
@index = data
|
273
273
|
file.each do |f|
|
@@ -282,7 +282,7 @@ class TokenTrieNER < NER
|
|
282
282
|
case
|
283
283
|
when TokenTrieNER === new
|
284
284
|
Log.debug "TokenTrieNER merging other TokenTrieNER"
|
285
|
-
|
285
|
+
TokenTrieNER.merge(@index, new.index)
|
286
286
|
when TSV === new
|
287
287
|
Log.debug "TokenTrieNER merging TSV"
|
288
288
|
new.with_unnamed do
|
@@ -12,7 +12,7 @@ class TestPubtator < Test::Unit::TestCase
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
15
|
+
def test_align
|
16
16
|
pmids = "19522013|20861254|38267746".split("|")
|
17
17
|
alignments = {}
|
18
18
|
with_corpus do |corpus|
|
@@ -47,7 +47,7 @@ class TestPubtator < Test::Unit::TestCase
|
|
47
47
|
end
|
48
48
|
end
|
49
49
|
|
50
|
-
def
|
50
|
+
def test_greek
|
51
51
|
|
52
52
|
pmids = "20861254".split("|")
|
53
53
|
alignments = {}
|
data/test/rbbt/ner/test_rnorm.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-05-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -179,7 +179,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
179
179
|
- !ruby/object:Gem::Version
|
180
180
|
version: '0'
|
181
181
|
requirements: []
|
182
|
-
rubygems_version: 3.5.
|
182
|
+
rubygems_version: 3.5.10
|
183
183
|
signing_key:
|
184
184
|
specification_version: 4
|
185
185
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|