rbbt-text 1.3.10 → 1.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/bow/misc.rb +1 -1
- data/lib/rbbt/document/corpus/pubmed.rb +1 -1
- metadata +27 -27
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 587b7971fd0f8fec2602323ea480521445ca417c3a407d057cf95df1f3a36216
|
|
4
|
+
data.tar.gz: 4f66306f80e838a0a27299705d79e5856b38dd936005d2b18004539bbb192431
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4aa191aa4e5cb5e3f7d4a49b30beb1eb7259a34074a7521a10b9951cd1cc7a097a06ba6a97d9f4f4e100b2058de3f94f5199cb069a030f93a3f69bf1ecec09ff
|
|
7
|
+
data.tar.gz: f3d5eb11d12f8a9d951d1073abd7e6cb5ace99bd075e7dc897f0aa715ae1552271019b4eb6849a172529d830f30bbc09ce40dd351fd81f9c06f338b075523e36
|
data/lib/rbbt/bow/misc.rb
CHANGED
|
@@ -37,7 +37,7 @@ $greek.each{|l,s| $inverse_greek[s] = l }
|
|
|
37
37
|
|
|
38
38
|
class String
|
|
39
39
|
CONSONANTS = []
|
|
40
|
-
if File.
|
|
40
|
+
if File.exist? File.join(Rbbt.datadir, 'wordlists/consonants')
|
|
41
41
|
Object::Open.read(File.join(Rbbt.datadir, 'wordlists/consonants')).each_line{|l| CONSONANTS << l.chomp}
|
|
42
42
|
end
|
|
43
43
|
|
|
@@ -20,7 +20,7 @@ module Document::Corpus
|
|
|
20
20
|
elsif type.to_sym == :title
|
|
21
21
|
Document.setup(article.title || "", PUBMED_NAMESPACE, pmid, type.to_sym, self)
|
|
22
22
|
elsif type.to_sym == :title_and_abstract
|
|
23
|
-
Document.setup((article.title || "") + "
|
|
23
|
+
Document.setup((article.title || "") + " " + (article.abstract || ""), PUBMED_NAMESPACE, pmid, type.to_sym, self)
|
|
24
24
|
else
|
|
25
25
|
raise "No FullText available for #{ pmid }" if article.full_text.nil?
|
|
26
26
|
Document.setup(article.full_text, PUBMED_NAMESPACE, pmid, :fulltext, self, :corpus => self)
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbbt-text
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.3.
|
|
4
|
+
version: 1.3.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Miguel Vazquez
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-04-11 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rbbt-util
|
|
@@ -175,45 +175,45 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
175
175
|
- !ruby/object:Gem::Version
|
|
176
176
|
version: '0'
|
|
177
177
|
requirements: []
|
|
178
|
-
rubygems_version: 3.
|
|
178
|
+
rubygems_version: 3.4.8
|
|
179
179
|
signing_key:
|
|
180
180
|
specification_version: 4
|
|
181
181
|
summary: Text mining tools for the Ruby Bioinformatics Toolkit (rbbt)
|
|
182
182
|
test_files:
|
|
183
|
-
- test/
|
|
184
|
-
- test/
|
|
185
|
-
- test/rbbt/
|
|
183
|
+
- test/rbbt/nlp/test_nlp.rb
|
|
184
|
+
- test/rbbt/nlp/open_nlp/test_sentence_splitter.rb
|
|
185
|
+
- test/rbbt/nlp/genia/test_sentence_splitter.rb
|
|
186
186
|
- test/rbbt/bow/test_bow.rb
|
|
187
187
|
- test/rbbt/bow/test_misc.rb
|
|
188
|
-
- test/rbbt/
|
|
189
|
-
- test/rbbt/
|
|
190
|
-
- test/rbbt/
|
|
191
|
-
- test/rbbt/segment/test_named_entity.rb
|
|
192
|
-
- test/rbbt/segment/test_corpus.rb
|
|
193
|
-
- test/rbbt/segment/test_range_index.rb
|
|
194
|
-
- test/rbbt/segment/test_annotation.rb
|
|
195
|
-
- test/rbbt/entity/test_document.rb
|
|
188
|
+
- test/rbbt/bow/test_dictionary.rb
|
|
189
|
+
- test/rbbt/test_document.rb
|
|
190
|
+
- test/rbbt/document/test_annotation.rb
|
|
196
191
|
- test/rbbt/document/corpus/test_pubmed.rb
|
|
197
192
|
- test/rbbt/document/test_corpus.rb
|
|
198
|
-
- test/rbbt/
|
|
199
|
-
- test/rbbt/test_document.rb
|
|
193
|
+
- test/rbbt/entity/test_document.rb
|
|
200
194
|
- test/rbbt/ner/test_patterns.rb
|
|
195
|
+
- test/rbbt/ner/test_NER.rb
|
|
196
|
+
- test/rbbt/ner/test_abner.rb
|
|
201
197
|
- test/rbbt/ner/rnorm/test_tokens.rb
|
|
198
|
+
- test/rbbt/ner/test_rnorm.rb
|
|
199
|
+
- test/rbbt/ner/test_regexpNER.rb
|
|
202
200
|
- test/rbbt/ner/test_ngram_prefix_dictionary.rb
|
|
203
|
-
- test/rbbt/ner/test_token_trieNER.rb
|
|
204
|
-
- test/rbbt/ner/test_finder.rb
|
|
205
201
|
- test/rbbt/ner/test_brat.rb
|
|
206
|
-
- test/rbbt/ner/test_regexpNER.rb
|
|
207
202
|
- test/rbbt/ner/test_g_norm_plus.rb
|
|
208
|
-
- test/rbbt/ner/test_rnorm.rb
|
|
209
|
-
- test/rbbt/ner/test_linnaeus.rb
|
|
210
203
|
- test/rbbt/ner/test_chemical_tagger.rb
|
|
211
|
-
- test/rbbt/ner/
|
|
212
|
-
- test/rbbt/ner/
|
|
204
|
+
- test/rbbt/ner/test_banner.rb
|
|
205
|
+
- test/rbbt/ner/test_token_trieNER.rb
|
|
206
|
+
- test/rbbt/ner/test_finder.rb
|
|
213
207
|
- test/rbbt/ner/test_rner.rb
|
|
208
|
+
- test/rbbt/ner/test_linnaeus.rb
|
|
214
209
|
- test/rbbt/ner/test_oscar4.rb
|
|
215
|
-
- test/rbbt/ner/test_banner.rb
|
|
216
210
|
- test/rbbt/test_segment.rb
|
|
217
|
-
- test/rbbt/
|
|
218
|
-
- test/rbbt/
|
|
219
|
-
- test/rbbt/
|
|
211
|
+
- test/rbbt/segment/test_transformed.rb
|
|
212
|
+
- test/rbbt/segment/test_overlaps.rb
|
|
213
|
+
- test/rbbt/segment/test_annotation.rb
|
|
214
|
+
- test/rbbt/segment/test_named_entity.rb
|
|
215
|
+
- test/rbbt/segment/test_encoding.rb
|
|
216
|
+
- test/rbbt/segment/test_range_index.rb
|
|
217
|
+
- test/rbbt/segment/test_corpus.rb
|
|
218
|
+
- test/test_spaCy.rb
|
|
219
|
+
- test/test_helper.rb
|