rbbt-text 1.3.7 → 1.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/document/corpus/pubmed.rb +13 -5
- data/lib/rbbt/nlp/spaCy.rb +2 -2
- data/test/rbbt/document/corpus/test_pubmed.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1b07b2646ecdc51599e2a2356fd18708e88d819944910a07930f67ec3fc012d
|
4
|
+
data.tar.gz: 03bcbe61f41d830668b50fcfc253fa2b43285774040f61fb3fb0a58f80e9dfd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae6de2dd809642ca38276ff82e243efeb193cf432bc78aea92e772ab21ff489f23224b9e93de726dcacdb06910716f1107171433cc39e7b022ba14ee4ed284f6
|
7
|
+
data.tar.gz: 82768060a28248d459031030b6ba49b500b63a9d3ae2199ccdf1417fd3b1f66ce0d962db17875615ee36bb3b5879d8ccbbdec892942f544fa08481b4551a1003
|
@@ -4,10 +4,11 @@ module Document::Corpus
|
|
4
4
|
PUBMED_NAMESPACE="PMID"
|
5
5
|
def add_pmid(pmid, type = nil, update = false)
|
6
6
|
type = :abstract if type.nil?
|
7
|
-
|
7
|
+
|
8
|
+
if ! (update || Array === pmid)
|
8
9
|
id = [PUBMED_NAMESPACE, pmid, type].collect{|e| e.to_s}*":"
|
9
10
|
documents = self.documents(id)
|
10
|
-
return documents if documents.any?
|
11
|
+
return documents.first if documents.any?
|
11
12
|
end
|
12
13
|
|
13
14
|
pmids = Array === pmid ? pmid : [pmid]
|
@@ -27,7 +28,14 @@ module Document::Corpus
|
|
27
28
|
document
|
28
29
|
end
|
29
30
|
|
30
|
-
|
31
|
+
if Array === pmid
|
32
|
+
corpus = res.first.corpus if res.first
|
33
|
+
Document.setup(res, :corpus => corpus)
|
34
|
+
else
|
35
|
+
res = res.first
|
36
|
+
end
|
37
|
+
|
38
|
+
res
|
31
39
|
end
|
32
40
|
|
33
41
|
def add_pubmed_query(query, max = 3000, type = nil)
|
@@ -35,8 +43,8 @@ module Document::Corpus
|
|
35
43
|
add_pmid(pmids, type)
|
36
44
|
end
|
37
45
|
|
38
|
-
self.claim "PMID" do |id,
|
46
|
+
self.claim "PMID" do |id,type,update|
|
39
47
|
Log.debug "Claiming #{id}"
|
40
|
-
self.add_pmid(id, type)
|
48
|
+
self.add_pmid(id, type,update)
|
41
49
|
end
|
42
50
|
end
|
data/lib/rbbt/nlp/spaCy.rb
CHANGED
@@ -153,8 +153,8 @@ module SpaCy
|
|
153
153
|
|
154
154
|
chunk_index = Segment.index(SpaCy.chunk_segments(text, lang))
|
155
155
|
|
156
|
-
source_id = chunk_index[source.offset].first || source.segid
|
157
|
-
target_id = chunk_index[target.offset].first || target.segid
|
156
|
+
source_id = chunk_index[source.offset.to_i].first || source.segid
|
157
|
+
target_id = chunk_index[target.offset.to_i].first || target.segid
|
158
158
|
|
159
159
|
path = Paths.dijkstra(graph, source_id, [target_id])
|
160
160
|
|
@@ -7,7 +7,7 @@ class TestCorpusPubmed < Test::Unit::TestCase
|
|
7
7
|
def test_add_pmid
|
8
8
|
corpus = Document::Corpus.setup({})
|
9
9
|
|
10
|
-
document = corpus.add_pmid("33359141", :abstract)
|
10
|
+
document = corpus.add_pmid("33359141", :abstract, true)
|
11
11
|
title = document.to(:title)
|
12
12
|
assert title.include?("COVID-19")
|
13
13
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|