rbbt 1.2.2 → 1.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/install_scripts/classifier/Rakefile +12 -17
- data/install_scripts/norm/config/cue_default.rb +1 -1
- data/install_scripts/norm/config/tokens_default.rb +7 -0
- data/install_scripts/organisms/Ath.Rakefile +1 -1
- data/install_scripts/organisms/Hsa.Rakefile +0 -1
- data/install_scripts/organisms/Sce.Rakefile +1 -1
- data/install_scripts/organisms/rake-include.rb +5 -7
- data/lib/rbbt/bow/bow.rb +1 -1
- data/lib/rbbt/ner/dictionaryNER.rb +1 -1
- data/lib/rbbt/ner/rnorm/tokens.rb +5 -1
- data/lib/rbbt/sources/organism.rb +2 -2
- data/lib/rbbt/sources/pubmed.rb +38 -8
- data/lib/rbbt/util/arrayHash.rb +57 -46
- data/lib/rbbt/util/index.rb +1 -1
- data/lib/rbbt/util/open.rb +7 -4
- data/test/rbbt/sources/test_pubmed.rb +11 -1
- data/test/rbbt/util/test_arrayHash.rb +1 -1
- metadata +38 -4
@@ -9,12 +9,12 @@ require 'rbbt/util/misc'
|
|
9
9
|
require 'progress-monitor'
|
10
10
|
require 'rand'
|
11
11
|
|
12
|
-
$hi
|
13
|
-
$low
|
14
|
-
$max
|
15
|
-
$bigrams
|
12
|
+
$hi ||= ENV['hi'] || 0.8
|
13
|
+
$low ||= ENV['low'] || 0.01
|
14
|
+
$max ||= ENV['max'] || 3000
|
15
|
+
$bigrams ||= ENV['bigrams'] == 'true'
|
16
16
|
|
17
|
-
$ndocs
|
17
|
+
$ndocs ||= ENV['ndocs'] || 5000
|
18
18
|
|
19
19
|
desc "Bilds Dictionary and Features for an organism"
|
20
20
|
rule(/data\/(.*)/) do |t|
|
@@ -34,7 +34,7 @@ rule(/data\/(.*)/) do |t|
|
|
34
34
|
|
35
35
|
|
36
36
|
chunks = all.chunk(50)
|
37
|
-
Progress.monitor("Building Dictionary for #{ org }: -"
|
37
|
+
Progress.monitor("Building Dictionary for #{ org }: -")
|
38
38
|
chunks.each{|chunk|
|
39
39
|
PubMed.get_article(chunk).each{|pmid, article|
|
40
40
|
words = BagOfWords.terms(article.text,$bigrams)
|
@@ -43,7 +43,7 @@ rule(/data\/(.*)/) do |t|
|
|
43
43
|
}
|
44
44
|
|
45
45
|
chunks = go.chunk(50)
|
46
|
-
Progress.monitor("Building Dictionary for #{ org }: +"
|
46
|
+
Progress.monitor("Building Dictionary for #{ org }: +")
|
47
47
|
chunks.each{|chunk|
|
48
48
|
PubMed.get_article(chunk).each{|pmid, article|
|
49
49
|
words = BagOfWords.terms(article.text,$bigrams)
|
@@ -59,7 +59,7 @@ rule(/data\/(.*)/) do |t|
|
|
59
59
|
fout = File.open(t.name, 'w')
|
60
60
|
fout.puts((['Name','Class'] + terms).join("\t"))
|
61
61
|
|
62
|
-
Progress.monitor("Building Features for #{ org }"
|
62
|
+
Progress.monitor("Building Features for #{ org }")
|
63
63
|
all.each{|pmid|
|
64
64
|
text = PubMed.get_article(pmid).text
|
65
65
|
fout.puts(([pmid, :-] + BagOfWords.features(text, terms)).join("\t"))
|
@@ -83,14 +83,9 @@ rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
|
|
83
83
|
features = t.name.sub(/results/,'data')
|
84
84
|
org = File.basename(t.name)
|
85
85
|
|
86
|
-
ndocs =
|
86
|
+
ndocs = 1000
|
87
87
|
|
88
|
-
used = []
|
89
|
-
if "".respond_to? :collect
|
90
|
-
used = Open.read(features).collect{|l| l.chomp.split(/\t/).first}[1..-1]
|
91
|
-
else
|
92
|
-
used = Open.read(features).lines.collect{|l| l.chomp.split(/\t/).first}[1..-1]
|
93
|
-
end
|
88
|
+
used = Open.read(features).read.split(/\n/).collect{|l| l.chomp.split(/\t/).first}[1..-1]
|
94
89
|
|
95
90
|
classifier = Classifier.new(model)
|
96
91
|
go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq - used
|
@@ -104,12 +99,12 @@ rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
|
|
104
99
|
raise "Not enogh unused articles to evaluate" if go.empty? || all.empty?
|
105
100
|
|
106
101
|
features_go = PubMed.get_article(go).collect{|pmid, article|
|
107
|
-
article
|
102
|
+
article.text
|
108
103
|
}
|
109
104
|
pos = classifier.classify(features_go).select{|v| v == '+'}.length
|
110
105
|
|
111
106
|
features_all = PubMed.get_article(all).collect{|pmid, article|
|
112
|
-
article
|
107
|
+
article.text
|
113
108
|
}
|
114
109
|
neg = classifier.classify(features_all).select{|v| v == '-'}.length
|
115
110
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
equal do |w| [w] end
|
2
2
|
standard do |w| [w.downcase.split(/\s+/).sort.join("")] end
|
3
|
-
cleaned do |w| [w.downcase.sub(/,.*/,'').sub(/\(.*\)/,'')] end
|
3
|
+
cleaned do |w| [w.downcase.sub(/,.*/,'').sub(/\(.*\)/,'').gsub(/s(?:=\W)/,'')] end
|
4
4
|
special do |w| s = w.split.select{|w| w.is_special?}.collect{|w| w.downcase.sub(/p$/,'')} end
|
5
5
|
words do |w|
|
6
6
|
w.sub(/(.*)I$/,'\1I \1').
|
@@ -1,4 +1,8 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
|
+
|
3
|
+
|
4
|
+
plural = Proc.new do |t| t.sub(/s$/,'') end
|
5
|
+
|
2
6
|
tokens do
|
3
7
|
|
4
8
|
# Some (possible) single letters first
|
@@ -70,6 +74,9 @@ comparisons do
|
|
70
74
|
miss.special -3
|
71
75
|
extr.special -3
|
72
76
|
|
77
|
+
transform.receptor plural
|
78
|
+
transform.protein plural
|
79
|
+
|
73
80
|
transform.roman do |t| [t.arabic, :number] end
|
74
81
|
transform.greek_letter do |t| [$inverse_greek[t.downcase], :greek] end
|
75
82
|
transform.ase do |t| [t, :special] end
|
@@ -37,7 +37,6 @@ $identifiers = {
|
|
37
37
|
[ 'Protein ID', "protein_id" ],
|
38
38
|
[ 'RefSeq Protein ID', "refseq_peptide" ],
|
39
39
|
[ 'Unigene ID', "unigene" ],
|
40
|
-
[ 'UniProt/SwissProt ID', "uniprot_swissprot" ],
|
41
40
|
[ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
|
42
41
|
[ 'HGNC ID', "hgnc_id", 'HGNC'],
|
43
42
|
['EMBL (Genbank) ID' , "embl"] ,
|
@@ -107,18 +107,18 @@ file 'identifiers' do
|
|
107
107
|
if $entrez2native
|
108
108
|
translations = {}
|
109
109
|
Entrez.entrez2native(*$entrez2native.values_at(:tax,:native,:fix,:check)).
|
110
|
-
each{|k,v|
|
111
|
-
|
112
|
-
}
|
110
|
+
each{|k,v| translations[k] = [v.join("|")] }
|
111
|
+
|
113
112
|
if translations.keys.any?
|
114
113
|
translations_data = ArrayHash.new(translations,'Entrez Gene ID', [$native_id])
|
115
114
|
if data
|
116
|
-
data.merge(translations_data)
|
115
|
+
data.merge(translations_data, $native_id)
|
117
116
|
else
|
118
117
|
data = translations_data
|
119
118
|
end
|
119
|
+
else
|
120
|
+
puts "No translations from Entrez to #{ $native_id }"
|
120
121
|
end
|
121
|
-
|
122
122
|
end
|
123
123
|
|
124
124
|
|
@@ -174,8 +174,6 @@ file 'identifiers' do
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
177
|
-
|
178
|
-
|
179
177
|
# Write ids to file
|
180
178
|
fout = File.open('identifiers', 'w')
|
181
179
|
fout.puts "##{$native_id}\t" + data.fields.join("\t")
|
data/lib/rbbt/bow/bow.rb
CHANGED
@@ -17,7 +17,7 @@ module BagOfWords
|
|
17
17
|
# 'rbbt/util/misc'.
|
18
18
|
def self.words(text)
|
19
19
|
return [] if text.nil?
|
20
|
-
raise "Stopword list not loaded. Have you installed the wordlists? (rbbt_config
|
20
|
+
raise "Stopword list not loaded. Have you installed the wordlists? (rbbt_config prepare wordlists)" if $stopwords.nil?
|
21
21
|
text.scan(/\w+/).
|
22
22
|
collect{|word| word.downcase.stem}.
|
23
23
|
select{|word|
|
@@ -72,7 +72,7 @@ module Organism
|
|
72
72
|
def self.norm(org, to_entrez = nil)
|
73
73
|
require 'rbbt/ner/rnorm'
|
74
74
|
if to_entrez.nil?
|
75
|
-
to_entrez = id_index(org, :native => 'Entrez Gene
|
75
|
+
to_entrez = id_index(org, :native => 'Entrez Gene Id', :other => [supported_ids(org).first])
|
76
76
|
end
|
77
77
|
|
78
78
|
token_file = File.join(Rbbt.datadir, 'norm','config',org.to_s + '.config')
|
@@ -216,7 +216,7 @@ module Organism
|
|
216
216
|
first = nil
|
217
217
|
if native
|
218
218
|
first = id_position(supported,native,options)
|
219
|
-
raise "No match for native format '#{ native }'"
|
219
|
+
raise "No match for native format '#{ native }'" if first.nil?
|
220
220
|
else
|
221
221
|
first = 0
|
222
222
|
end
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -24,7 +24,7 @@ module PubMed
|
|
24
24
|
|
25
25
|
@@last = Time.now
|
26
26
|
|
27
|
-
articles = xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/
|
27
|
+
articles = xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
|
28
28
|
|
29
29
|
if pmids.is_a? Array
|
30
30
|
list = {}
|
@@ -53,6 +53,7 @@ module PubMed
|
|
53
53
|
[:volume , "Journal/JournalIssue/Volume"],
|
54
54
|
[:issn , "Journal/ISSN"],
|
55
55
|
[:year , "Journal/JournalIssue/PubDate/Year"],
|
56
|
+
[:month , "Journal/JournalIssue/PubDate/Month"],
|
56
57
|
[:pages , "Pagination/MedlinePgn"],
|
57
58
|
[:abstract , "Abstract/AbstractText"],
|
58
59
|
]
|
@@ -63,6 +64,15 @@ module PubMed
|
|
63
64
|
title.gsub(/(\w*[A-Z][A-Z]+\w*)/, '{\1}')
|
64
65
|
end
|
65
66
|
|
67
|
+
def self.make_bibentry(lastname, year, title)
|
68
|
+
words = title.downcase.scan(/\w+/)
|
69
|
+
if words.first.length > 3
|
70
|
+
abrev = words.first
|
71
|
+
else
|
72
|
+
abrev = words[0..2].collect{|w| w.chars.first} * ""
|
73
|
+
end
|
74
|
+
[lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
|
75
|
+
end
|
66
76
|
def self.parse_xml(xml)
|
67
77
|
parser = LibXML::XML::Parser.string(xml)
|
68
78
|
pubmed = parser.parse.find("/PubmedArticle").first
|
@@ -84,17 +94,20 @@ module PubMed
|
|
84
94
|
|
85
95
|
bibentry = nil
|
86
96
|
info[:author] = article.find("AuthorList/Author").collect do |author|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
97
|
+
begin
|
98
|
+
lastname = author.find("LastName").first.content
|
99
|
+
if author.find("ForeName").first.nil?
|
100
|
+
forename = nil
|
101
|
+
else
|
102
|
+
forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
|
103
|
+
end
|
104
|
+
bibentry ||= make_bibentry lastname, info[:year], info[:title]
|
105
|
+
rescue
|
92
106
|
end
|
93
|
-
bibentry ||= [lastname, (info[:year] || "NOYEAR"), info[:title].scan(/\w+/)[0]] * ""
|
94
107
|
[lastname, forename] * ", "
|
95
108
|
end * " and "
|
96
109
|
|
97
|
-
info[:bibentry] = bibentry.downcase
|
110
|
+
info[:bibentry] = bibentry.downcase if bibentry
|
98
111
|
|
99
112
|
info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
|
100
113
|
|
@@ -122,6 +135,23 @@ module PubMed
|
|
122
135
|
@gscholar_pdf ||= GoogleScholar::full_text_url title
|
123
136
|
end
|
124
137
|
|
138
|
+
def full_text
|
139
|
+
return nil if pdf_url.nil?
|
140
|
+
|
141
|
+
text = nil
|
142
|
+
TmpFile.with_file do |pdf|
|
143
|
+
|
144
|
+
# Change user-agent, oh well...
|
145
|
+
`wget --user-agent=firefox #{ pdf_url } -O #{ pdf }`
|
146
|
+
TmpFile.with_file do |txt|
|
147
|
+
`pdftotext #{ pdf } #{ txt }`
|
148
|
+
text = Open.read(txt) if File.exists? txt
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
text
|
153
|
+
end
|
154
|
+
|
125
155
|
def bibtex
|
126
156
|
keys = [:author] + XML_KEYS.collect{|p| p.first } - [:bibentry]
|
127
157
|
bibtex = "@article{#{bibentry},\n"
|
data/lib/rbbt/util/arrayHash.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
|
2
2
|
class ArrayHash
|
3
3
|
|
4
|
+
def self.make_case_insensitive(hash)
|
5
|
+
new = {}
|
6
|
+
hash.each{|k,v|
|
7
|
+
new[k.to_s.downcase] = v
|
8
|
+
}
|
9
|
+
|
10
|
+
class << new; self; end.instance_eval{
|
11
|
+
alias_method :old_get, :[]
|
12
|
+
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
13
|
+
}
|
14
|
+
|
15
|
+
new
|
16
|
+
end
|
17
|
+
|
4
18
|
# Take two strings of elements separated by the character sep_char and join them
|
5
19
|
# into one, removing repetitions.
|
6
20
|
def self.merge_values_string(list1, list2, sep_char ='|')
|
@@ -33,8 +47,8 @@ class ArrayHash
|
|
33
47
|
|
34
48
|
|
35
49
|
# Take an hash of arrays and a position and use the value at that position
|
36
|
-
# of the arrays
|
37
|
-
# key prepended to the arrays. The options hash
|
50
|
+
# of the arrays to build a new hash with that value as key, and the original
|
51
|
+
# key prepended to the arrays. The options hash accepts the following keys
|
38
52
|
# :case_insensitive, which defaults to true, and :index, which indicates that
|
39
53
|
# the original key should be the value of the hash entry, instead of the
|
40
54
|
# complete array of values.
|
@@ -60,69 +74,63 @@ class ArrayHash
|
|
60
74
|
}
|
61
75
|
}
|
62
76
|
|
63
|
-
if case_insensitive
|
64
|
-
class << new; self; end.instance_eval{
|
65
|
-
alias_method :old_get, :[]
|
66
|
-
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
67
|
-
}
|
68
|
-
end
|
77
|
+
new = make_case_insensitive new if case_insensitive
|
69
78
|
|
70
79
|
new
|
71
80
|
end
|
72
81
|
|
73
|
-
# Merge
|
82
|
+
# Merge one hash of arrays into another. Each hash contains a number of fields for each
|
74
83
|
# entry. The pos1 and pos2 indicate what fields should be used to match
|
75
84
|
# entries, the values for pos1 and pos2 can be an integer indicating the
|
76
85
|
# position in the array or the symbol :main to refer to the key of the hash.
|
77
86
|
# The options hash accepts the key :case_insensitive, which defaults to true.
|
78
87
|
def self.merge(hash1, hash2, pos1 = :main, pos2 = :main, options = {})
|
79
|
-
|
80
88
|
case_insensitive = options[:case_insensitive]; case_insensitive = true if case_insensitive.nil?
|
89
|
+
|
90
|
+
raise "Key #{ pos1 } should be an Interger or :main" unless Fixnum === pos1 || pos1.to_s.downcase == 'main'
|
91
|
+
raise "Key #{ pos2 } should be an Interger or :main" unless Fixnum === pos2 || pos2.to_s.downcase == 'main'
|
92
|
+
|
93
|
+
|
94
|
+
# Pullout if pos2 is not :main
|
95
|
+
hash2 = pullout(hash2, pos2) unless pos2.to_s.downcase == 'main'
|
96
|
+
|
97
|
+
# Translate if pos1 is not :main
|
81
98
|
if pos1.to_s.downcase != 'main'
|
82
|
-
|
83
|
-
elsif options[:case_insensitive]
|
99
|
+
index = pullout(hash1, pos1, options.merge(:index => true))
|
84
100
|
new = {}
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
91
|
-
}
|
92
|
-
hash1 = new
|
101
|
+
hash2.each do |key, list|
|
102
|
+
next unless index[key]
|
103
|
+
new[index[key]] = list
|
104
|
+
end
|
105
|
+
hash2 = new
|
93
106
|
end
|
94
107
|
|
108
|
+
# Get the lengths of the arrays on each hash (they should
|
109
|
+
# be the same for every entry)
|
95
110
|
length1 = hash1.values.first.length
|
96
111
|
length2 = hash2.values.first.length
|
112
|
+
|
113
|
+
if case_insensitive
|
114
|
+
hash1 = make_case_insensitive hash1
|
115
|
+
hash2 = make_case_insensitive hash2
|
116
|
+
end
|
97
117
|
|
98
118
|
new = {}
|
99
|
-
hash2.each
|
100
|
-
|
101
|
-
|
102
|
-
k = key
|
103
|
-
v = values
|
104
|
-
when Fixnum === pos2
|
105
|
-
k = values[pos2]
|
106
|
-
v = values
|
107
|
-
v.delete_at(pos2)
|
108
|
-
v.unshift(key)
|
119
|
+
(hash1.keys + hash2.keys).uniq.each do |key|
|
120
|
+
if hash2[key].nil?
|
121
|
+
list2 = [''] * length2
|
109
122
|
else
|
110
|
-
|
123
|
+
list2 = hash2[key]
|
111
124
|
end
|
112
125
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
new[c] = hash1[c] || [''] * length1
|
118
|
-
new[c] += v
|
119
|
-
}
|
126
|
+
if hash1[key].nil?
|
127
|
+
list1 = [''] * length1
|
128
|
+
else
|
129
|
+
list1 = hash1[key]
|
120
130
|
end
|
121
|
-
}
|
122
131
|
|
123
|
-
|
124
|
-
|
125
|
-
}
|
132
|
+
new[key] = list1 + list2
|
133
|
+
end
|
126
134
|
|
127
135
|
new
|
128
136
|
end
|
@@ -139,7 +147,7 @@ class ArrayHash
|
|
139
147
|
new
|
140
148
|
end
|
141
149
|
|
142
|
-
# Clean structure for repeated values. If the same value
|
150
|
+
# Clean structure for repeated values. If the same value appears two times
|
143
151
|
# eliminate the one that appears latter on the values list (columns of the
|
144
152
|
# ArrayHash are assumed to be sorted for importance) if the appear on the
|
145
153
|
# same position, remove the one with the smaller vale of the code after
|
@@ -188,7 +196,7 @@ class ArrayHash
|
|
188
196
|
@data = hash
|
189
197
|
@main = main.to_s
|
190
198
|
|
191
|
-
if fields.nil?
|
199
|
+
if fields.nil? || fields.empty?
|
192
200
|
l = hash.values.first.length
|
193
201
|
fields = []
|
194
202
|
l.times{|i| fields << "F#{i}"}
|
@@ -207,10 +215,10 @@ class ArrayHash
|
|
207
215
|
# Returns the position of a given field in the value arrays
|
208
216
|
def field_pos(field)
|
209
217
|
return :main if field == :main
|
210
|
-
if field.downcase == self.main.downcase
|
218
|
+
if field.to_s.downcase == self.main.to_s.downcase
|
211
219
|
return :main
|
212
220
|
else
|
213
|
-
@fields.collect{|f| f.downcase}.index(field.to_s.downcase)
|
221
|
+
@fields.collect{|f| f.downcase }.index(field.to_s.downcase)
|
214
222
|
end
|
215
223
|
end
|
216
224
|
|
@@ -222,6 +230,9 @@ class ArrayHash
|
|
222
230
|
pos1 = self.field_pos(field)
|
223
231
|
pos2 = other.field_pos(field)
|
224
232
|
|
233
|
+
raise "Field #{ field } not found in target hash" if pos1.nil?
|
234
|
+
raise "Field #{ field } not found in added hash" if pos2.nil?
|
235
|
+
|
225
236
|
new = ArrayHash.merge(self.data, other.data, pos1, pos2, options)
|
226
237
|
@data = new
|
227
238
|
if pos2 == :main
|
data/lib/rbbt/util/index.rb
CHANGED
@@ -8,7 +8,7 @@ module Index
|
|
8
8
|
# where each element points to the first element in the row. +lexicon+
|
9
9
|
# is the file containing the data.
|
10
10
|
def self.index(lexicon, options = {})
|
11
|
-
options = {:sep => "\t
|
11
|
+
options = {:sep => "\t", :sep2 => '\|', :case_sensitive => true}.merge(options)
|
12
12
|
|
13
13
|
|
14
14
|
data = Open.to_hash(lexicon, options)
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -17,10 +17,13 @@ module Open
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.fields(line, sep = "\t")
|
20
|
-
|
20
|
+
line << sep
|
21
|
+
line << "PLACEHOLDER"
|
22
|
+
chunks = line.split(/(#{sep})/).select{|c| c !~ /^#{sep}$/ }
|
21
23
|
if line =~ /#{sep}$/
|
22
24
|
chunks << ""
|
23
25
|
end
|
26
|
+
chunks.pop
|
24
27
|
chunks
|
25
28
|
end
|
26
29
|
|
@@ -101,7 +104,7 @@ module Open
|
|
101
104
|
|
102
105
|
wait(options[:nice]) if options[:nice]
|
103
106
|
tmp = TmpFile.tmp_file("open-")
|
104
|
-
`wget -O #{tmp} '#{url}' #{options[:quiet] ? '-q' : '' }`
|
107
|
+
`wget --user-agent=firefox -O #{tmp} '#{url}' #{options[:quiet] ? '-q' : '' }`
|
105
108
|
|
106
109
|
if $?.success?
|
107
110
|
if gziped(url)
|
@@ -197,7 +200,7 @@ module Open
|
|
197
200
|
sep2 = options[:sep2] || "|"
|
198
201
|
single = options[:single]
|
199
202
|
single = false if single.nil?
|
200
|
-
flatten = options[:flatten]
|
203
|
+
flatten = options[:flatten]
|
201
204
|
flatten = single if flatten.nil?
|
202
205
|
|
203
206
|
extra = [extra] if extra && ! extra.is_a?( Array)
|
@@ -214,7 +217,7 @@ module Open
|
|
214
217
|
next if exclude and exclude.call(l)
|
215
218
|
next if select and ! select.call(l)
|
216
219
|
|
217
|
-
row_fields = self.fields(l, sep)
|
220
|
+
row_fields = self.fields(l.chomp, sep)
|
218
221
|
id = row_fields[native]
|
219
222
|
next if id.nil? || id == ""
|
220
223
|
|
@@ -19,11 +19,21 @@ class TestPubMed < Test::Unit::TestCase
|
|
19
19
|
pmids = ['16438716', 17204154]
|
20
20
|
assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
21
21
|
end
|
22
|
-
|
22
|
+
|
23
|
+
def test_full_text
|
24
|
+
pmid = '16438716'
|
25
|
+
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
26
|
+
end
|
27
|
+
|
23
28
|
def test_query
|
24
29
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
25
30
|
end
|
26
31
|
|
32
|
+
def test_bibentry
|
33
|
+
assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
|
34
|
+
assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
|
35
|
+
end
|
36
|
+
|
27
37
|
end
|
28
38
|
|
29
39
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 1.2.
|
8
|
+
- 5
|
9
|
+
version: 1.2.5
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Miguel Vazquez
|
@@ -14,13 +14,14 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-10-22 00:00:00 +02:00
|
18
18
|
default_executable: rbbt_config
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rake
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
@@ -35,6 +36,7 @@ dependencies:
|
|
35
36
|
name: simpleconsole
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -47,6 +49,7 @@ dependencies:
|
|
47
49
|
name: stemmer
|
48
50
|
prerelease: false
|
49
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
50
53
|
requirements:
|
51
54
|
- - ">="
|
52
55
|
- !ruby/object:Gem::Version
|
@@ -59,6 +62,7 @@ dependencies:
|
|
59
62
|
name: progress-monitor
|
60
63
|
prerelease: false
|
61
64
|
requirement: &id004 !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
62
66
|
requirements:
|
63
67
|
- - ">="
|
64
68
|
- !ruby/object:Gem::Version
|
@@ -71,6 +75,7 @@ dependencies:
|
|
71
75
|
name: simpleconsole
|
72
76
|
prerelease: false
|
73
77
|
requirement: &id005 !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
74
79
|
requirements:
|
75
80
|
- - ">="
|
76
81
|
- !ruby/object:Gem::Version
|
@@ -150,6 +155,33 @@ files:
|
|
150
155
|
- tasks/install.rake
|
151
156
|
- LICENSE
|
152
157
|
- README.rdoc
|
158
|
+
- test/test_rbbt.rb
|
159
|
+
- test/rbbt/bow/test_bow.rb
|
160
|
+
- test/rbbt/bow/test_classifier.rb
|
161
|
+
- test/rbbt/bow/test_dictionary.rb
|
162
|
+
- test/rbbt/sources/test_organism.rb
|
163
|
+
- test/rbbt/sources/test_biomart.rb
|
164
|
+
- test/rbbt/sources/test_pubmed.rb
|
165
|
+
- test/rbbt/sources/test_polysearch.rb
|
166
|
+
- test/rbbt/sources/test_biocreative.rb
|
167
|
+
- test/rbbt/sources/test_entrez.rb
|
168
|
+
- test/rbbt/sources/test_go.rb
|
169
|
+
- test/rbbt/ner/test_rner.rb
|
170
|
+
- test/rbbt/ner/test_dictionaryNER.rb
|
171
|
+
- test/rbbt/ner/test_banner.rb
|
172
|
+
- test/rbbt/ner/test_rnorm.rb
|
173
|
+
- test/rbbt/ner/test_regexpNER.rb
|
174
|
+
- test/rbbt/ner/test_abner.rb
|
175
|
+
- test/rbbt/ner/rnorm/test_cue_index.rb
|
176
|
+
- test/rbbt/ner/rnorm/test_tokens.rb
|
177
|
+
- test/rbbt/util/test_misc.rb
|
178
|
+
- test/rbbt/util/test_tmpfile.rb
|
179
|
+
- test/rbbt/util/test_arrayHash.rb
|
180
|
+
- test/rbbt/util/test_filecache.rb
|
181
|
+
- test/rbbt/util/test_simpleDSL.rb
|
182
|
+
- test/rbbt/util/test_open.rb
|
183
|
+
- test/rbbt/util/test_index.rb
|
184
|
+
- test/test_helper.rb
|
153
185
|
has_rdoc: true
|
154
186
|
homepage: http://github.com/mikisvaz/rbbt
|
155
187
|
licenses: []
|
@@ -160,6 +192,7 @@ rdoc_options:
|
|
160
192
|
require_paths:
|
161
193
|
- lib
|
162
194
|
required_ruby_version: !ruby/object:Gem::Requirement
|
195
|
+
none: false
|
163
196
|
requirements:
|
164
197
|
- - ">="
|
165
198
|
- !ruby/object:Gem::Version
|
@@ -167,6 +200,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
167
200
|
- 0
|
168
201
|
version: "0"
|
169
202
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
203
|
+
none: false
|
170
204
|
requirements:
|
171
205
|
- - ">="
|
172
206
|
- !ruby/object:Gem::Version
|
@@ -176,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
210
|
requirements: []
|
177
211
|
|
178
212
|
rubyforge_project:
|
179
|
-
rubygems_version: 1.3.
|
213
|
+
rubygems_version: 1.3.7
|
180
214
|
signing_key:
|
181
215
|
specification_version: 3
|
182
216
|
summary: Bioinformatics and text mining toolbox
|