rbbt 1.2.2 → 1.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/install_scripts/classifier/Rakefile +12 -17
- data/install_scripts/norm/config/cue_default.rb +1 -1
- data/install_scripts/norm/config/tokens_default.rb +7 -0
- data/install_scripts/organisms/Ath.Rakefile +1 -1
- data/install_scripts/organisms/Hsa.Rakefile +0 -1
- data/install_scripts/organisms/Sce.Rakefile +1 -1
- data/install_scripts/organisms/rake-include.rb +5 -7
- data/lib/rbbt/bow/bow.rb +1 -1
- data/lib/rbbt/ner/dictionaryNER.rb +1 -1
- data/lib/rbbt/ner/rnorm/tokens.rb +5 -1
- data/lib/rbbt/sources/organism.rb +2 -2
- data/lib/rbbt/sources/pubmed.rb +38 -8
- data/lib/rbbt/util/arrayHash.rb +57 -46
- data/lib/rbbt/util/index.rb +1 -1
- data/lib/rbbt/util/open.rb +7 -4
- data/test/rbbt/sources/test_pubmed.rb +11 -1
- data/test/rbbt/util/test_arrayHash.rb +1 -1
- metadata +38 -4
@@ -9,12 +9,12 @@ require 'rbbt/util/misc'
|
|
9
9
|
require 'progress-monitor'
|
10
10
|
require 'rand'
|
11
11
|
|
12
|
-
$hi
|
13
|
-
$low
|
14
|
-
$max
|
15
|
-
$bigrams
|
12
|
+
$hi ||= ENV['hi'] || 0.8
|
13
|
+
$low ||= ENV['low'] || 0.01
|
14
|
+
$max ||= ENV['max'] || 3000
|
15
|
+
$bigrams ||= ENV['bigrams'] == 'true'
|
16
16
|
|
17
|
-
$ndocs
|
17
|
+
$ndocs ||= ENV['ndocs'] || 5000
|
18
18
|
|
19
19
|
desc "Bilds Dictionary and Features for an organism"
|
20
20
|
rule(/data\/(.*)/) do |t|
|
@@ -34,7 +34,7 @@ rule(/data\/(.*)/) do |t|
|
|
34
34
|
|
35
35
|
|
36
36
|
chunks = all.chunk(50)
|
37
|
-
Progress.monitor("Building Dictionary for #{ org }: -"
|
37
|
+
Progress.monitor("Building Dictionary for #{ org }: -")
|
38
38
|
chunks.each{|chunk|
|
39
39
|
PubMed.get_article(chunk).each{|pmid, article|
|
40
40
|
words = BagOfWords.terms(article.text,$bigrams)
|
@@ -43,7 +43,7 @@ rule(/data\/(.*)/) do |t|
|
|
43
43
|
}
|
44
44
|
|
45
45
|
chunks = go.chunk(50)
|
46
|
-
Progress.monitor("Building Dictionary for #{ org }: +"
|
46
|
+
Progress.monitor("Building Dictionary for #{ org }: +")
|
47
47
|
chunks.each{|chunk|
|
48
48
|
PubMed.get_article(chunk).each{|pmid, article|
|
49
49
|
words = BagOfWords.terms(article.text,$bigrams)
|
@@ -59,7 +59,7 @@ rule(/data\/(.*)/) do |t|
|
|
59
59
|
fout = File.open(t.name, 'w')
|
60
60
|
fout.puts((['Name','Class'] + terms).join("\t"))
|
61
61
|
|
62
|
-
Progress.monitor("Building Features for #{ org }"
|
62
|
+
Progress.monitor("Building Features for #{ org }")
|
63
63
|
all.each{|pmid|
|
64
64
|
text = PubMed.get_article(pmid).text
|
65
65
|
fout.puts(([pmid, :-] + BagOfWords.features(text, terms)).join("\t"))
|
@@ -83,14 +83,9 @@ rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
|
|
83
83
|
features = t.name.sub(/results/,'data')
|
84
84
|
org = File.basename(t.name)
|
85
85
|
|
86
|
-
ndocs =
|
86
|
+
ndocs = 1000
|
87
87
|
|
88
|
-
used = []
|
89
|
-
if "".respond_to? :collect
|
90
|
-
used = Open.read(features).collect{|l| l.chomp.split(/\t/).first}[1..-1]
|
91
|
-
else
|
92
|
-
used = Open.read(features).lines.collect{|l| l.chomp.split(/\t/).first}[1..-1]
|
93
|
-
end
|
88
|
+
used = Open.read(features).read.split(/\n/).collect{|l| l.chomp.split(/\t/).first}[1..-1]
|
94
89
|
|
95
90
|
classifier = Classifier.new(model)
|
96
91
|
go = Organism.gene_literature_go(org).collect{|gene, pmids| pmids}.flatten.uniq - used
|
@@ -104,12 +99,12 @@ rule (/results\/(.*)/) => lambda{|n| n.sub(/results/,'model')} do |t|
|
|
104
99
|
raise "Not enogh unused articles to evaluate" if go.empty? || all.empty?
|
105
100
|
|
106
101
|
features_go = PubMed.get_article(go).collect{|pmid, article|
|
107
|
-
article
|
102
|
+
article.text
|
108
103
|
}
|
109
104
|
pos = classifier.classify(features_go).select{|v| v == '+'}.length
|
110
105
|
|
111
106
|
features_all = PubMed.get_article(all).collect{|pmid, article|
|
112
|
-
article
|
107
|
+
article.text
|
113
108
|
}
|
114
109
|
neg = classifier.classify(features_all).select{|v| v == '-'}.length
|
115
110
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
equal do |w| [w] end
|
2
2
|
standard do |w| [w.downcase.split(/\s+/).sort.join("")] end
|
3
|
-
cleaned do |w| [w.downcase.sub(/,.*/,'').sub(/\(.*\)/,'')] end
|
3
|
+
cleaned do |w| [w.downcase.sub(/,.*/,'').sub(/\(.*\)/,'').gsub(/s(?:=\W)/,'')] end
|
4
4
|
special do |w| s = w.split.select{|w| w.is_special?}.collect{|w| w.downcase.sub(/p$/,'')} end
|
5
5
|
words do |w|
|
6
6
|
w.sub(/(.*)I$/,'\1I \1').
|
@@ -1,4 +1,8 @@
|
|
1
1
|
require 'rbbt/util/misc'
|
2
|
+
|
3
|
+
|
4
|
+
plural = Proc.new do |t| t.sub(/s$/,'') end
|
5
|
+
|
2
6
|
tokens do
|
3
7
|
|
4
8
|
# Some (possible) single letters first
|
@@ -70,6 +74,9 @@ comparisons do
|
|
70
74
|
miss.special -3
|
71
75
|
extr.special -3
|
72
76
|
|
77
|
+
transform.receptor plural
|
78
|
+
transform.protein plural
|
79
|
+
|
73
80
|
transform.roman do |t| [t.arabic, :number] end
|
74
81
|
transform.greek_letter do |t| [$inverse_greek[t.downcase], :greek] end
|
75
82
|
transform.ase do |t| [t, :special] end
|
@@ -37,7 +37,6 @@ $identifiers = {
|
|
37
37
|
[ 'Protein ID', "protein_id" ],
|
38
38
|
[ 'RefSeq Protein ID', "refseq_peptide" ],
|
39
39
|
[ 'Unigene ID', "unigene" ],
|
40
|
-
[ 'UniProt/SwissProt ID', "uniprot_swissprot" ],
|
41
40
|
[ 'UniProt/SwissProt Accession', "uniprot_swissprot_accession" ],
|
42
41
|
[ 'HGNC ID', "hgnc_id", 'HGNC'],
|
43
42
|
['EMBL (Genbank) ID' , "embl"] ,
|
@@ -107,18 +107,18 @@ file 'identifiers' do
|
|
107
107
|
if $entrez2native
|
108
108
|
translations = {}
|
109
109
|
Entrez.entrez2native(*$entrez2native.values_at(:tax,:native,:fix,:check)).
|
110
|
-
each{|k,v|
|
111
|
-
|
112
|
-
}
|
110
|
+
each{|k,v| translations[k] = [v.join("|")] }
|
111
|
+
|
113
112
|
if translations.keys.any?
|
114
113
|
translations_data = ArrayHash.new(translations,'Entrez Gene ID', [$native_id])
|
115
114
|
if data
|
116
|
-
data.merge(translations_data)
|
115
|
+
data.merge(translations_data, $native_id)
|
117
116
|
else
|
118
117
|
data = translations_data
|
119
118
|
end
|
119
|
+
else
|
120
|
+
puts "No translations from Entrez to #{ $native_id }"
|
120
121
|
end
|
121
|
-
|
122
122
|
end
|
123
123
|
|
124
124
|
|
@@ -174,8 +174,6 @@ file 'identifiers' do
|
|
174
174
|
end
|
175
175
|
end
|
176
176
|
|
177
|
-
|
178
|
-
|
179
177
|
# Write ids to file
|
180
178
|
fout = File.open('identifiers', 'w')
|
181
179
|
fout.puts "##{$native_id}\t" + data.fields.join("\t")
|
data/lib/rbbt/bow/bow.rb
CHANGED
@@ -17,7 +17,7 @@ module BagOfWords
|
|
17
17
|
# 'rbbt/util/misc'.
|
18
18
|
def self.words(text)
|
19
19
|
return [] if text.nil?
|
20
|
-
raise "Stopword list not loaded. Have you installed the wordlists? (rbbt_config
|
20
|
+
raise "Stopword list not loaded. Have you installed the wordlists? (rbbt_config prepare wordlists)" if $stopwords.nil?
|
21
21
|
text.scan(/\w+/).
|
22
22
|
collect{|word| word.downcase.stem}.
|
23
23
|
select{|word|
|
@@ -72,7 +72,7 @@ module Organism
|
|
72
72
|
def self.norm(org, to_entrez = nil)
|
73
73
|
require 'rbbt/ner/rnorm'
|
74
74
|
if to_entrez.nil?
|
75
|
-
to_entrez = id_index(org, :native => 'Entrez Gene
|
75
|
+
to_entrez = id_index(org, :native => 'Entrez Gene Id', :other => [supported_ids(org).first])
|
76
76
|
end
|
77
77
|
|
78
78
|
token_file = File.join(Rbbt.datadir, 'norm','config',org.to_s + '.config')
|
@@ -216,7 +216,7 @@ module Organism
|
|
216
216
|
first = nil
|
217
217
|
if native
|
218
218
|
first = id_position(supported,native,options)
|
219
|
-
raise "No match for native format '#{ native }'"
|
219
|
+
raise "No match for native format '#{ native }'" if first.nil?
|
220
220
|
else
|
221
221
|
first = 0
|
222
222
|
end
|
data/lib/rbbt/sources/pubmed.rb
CHANGED
@@ -24,7 +24,7 @@ module PubMed
|
|
24
24
|
|
25
25
|
@@last = Time.now
|
26
26
|
|
27
|
-
articles = xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/
|
27
|
+
articles = xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
|
28
28
|
|
29
29
|
if pmids.is_a? Array
|
30
30
|
list = {}
|
@@ -53,6 +53,7 @@ module PubMed
|
|
53
53
|
[:volume , "Journal/JournalIssue/Volume"],
|
54
54
|
[:issn , "Journal/ISSN"],
|
55
55
|
[:year , "Journal/JournalIssue/PubDate/Year"],
|
56
|
+
[:month , "Journal/JournalIssue/PubDate/Month"],
|
56
57
|
[:pages , "Pagination/MedlinePgn"],
|
57
58
|
[:abstract , "Abstract/AbstractText"],
|
58
59
|
]
|
@@ -63,6 +64,15 @@ module PubMed
|
|
63
64
|
title.gsub(/(\w*[A-Z][A-Z]+\w*)/, '{\1}')
|
64
65
|
end
|
65
66
|
|
67
|
+
def self.make_bibentry(lastname, year, title)
|
68
|
+
words = title.downcase.scan(/\w+/)
|
69
|
+
if words.first.length > 3
|
70
|
+
abrev = words.first
|
71
|
+
else
|
72
|
+
abrev = words[0..2].collect{|w| w.chars.first} * ""
|
73
|
+
end
|
74
|
+
[lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
|
75
|
+
end
|
66
76
|
def self.parse_xml(xml)
|
67
77
|
parser = LibXML::XML::Parser.string(xml)
|
68
78
|
pubmed = parser.parse.find("/PubmedArticle").first
|
@@ -84,17 +94,20 @@ module PubMed
|
|
84
94
|
|
85
95
|
bibentry = nil
|
86
96
|
info[:author] = article.find("AuthorList/Author").collect do |author|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
97
|
+
begin
|
98
|
+
lastname = author.find("LastName").first.content
|
99
|
+
if author.find("ForeName").first.nil?
|
100
|
+
forename = nil
|
101
|
+
else
|
102
|
+
forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
|
103
|
+
end
|
104
|
+
bibentry ||= make_bibentry lastname, info[:year], info[:title]
|
105
|
+
rescue
|
92
106
|
end
|
93
|
-
bibentry ||= [lastname, (info[:year] || "NOYEAR"), info[:title].scan(/\w+/)[0]] * ""
|
94
107
|
[lastname, forename] * ", "
|
95
108
|
end * " and "
|
96
109
|
|
97
|
-
info[:bibentry] = bibentry.downcase
|
110
|
+
info[:bibentry] = bibentry.downcase if bibentry
|
98
111
|
|
99
112
|
info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
|
100
113
|
|
@@ -122,6 +135,23 @@ module PubMed
|
|
122
135
|
@gscholar_pdf ||= GoogleScholar::full_text_url title
|
123
136
|
end
|
124
137
|
|
138
|
+
def full_text
|
139
|
+
return nil if pdf_url.nil?
|
140
|
+
|
141
|
+
text = nil
|
142
|
+
TmpFile.with_file do |pdf|
|
143
|
+
|
144
|
+
# Change user-agent, oh well...
|
145
|
+
`wget --user-agent=firefox #{ pdf_url } -O #{ pdf }`
|
146
|
+
TmpFile.with_file do |txt|
|
147
|
+
`pdftotext #{ pdf } #{ txt }`
|
148
|
+
text = Open.read(txt) if File.exists? txt
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
text
|
153
|
+
end
|
154
|
+
|
125
155
|
def bibtex
|
126
156
|
keys = [:author] + XML_KEYS.collect{|p| p.first } - [:bibentry]
|
127
157
|
bibtex = "@article{#{bibentry},\n"
|
data/lib/rbbt/util/arrayHash.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
|
2
2
|
class ArrayHash
|
3
3
|
|
4
|
+
def self.make_case_insensitive(hash)
|
5
|
+
new = {}
|
6
|
+
hash.each{|k,v|
|
7
|
+
new[k.to_s.downcase] = v
|
8
|
+
}
|
9
|
+
|
10
|
+
class << new; self; end.instance_eval{
|
11
|
+
alias_method :old_get, :[]
|
12
|
+
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
13
|
+
}
|
14
|
+
|
15
|
+
new
|
16
|
+
end
|
17
|
+
|
4
18
|
# Take two strings of elements separated by the character sep_char and join them
|
5
19
|
# into one, removing repetitions.
|
6
20
|
def self.merge_values_string(list1, list2, sep_char ='|')
|
@@ -33,8 +47,8 @@ class ArrayHash
|
|
33
47
|
|
34
48
|
|
35
49
|
# Take an hash of arrays and a position and use the value at that position
|
36
|
-
# of the arrays
|
37
|
-
# key prepended to the arrays. The options hash
|
50
|
+
# of the arrays to build a new hash with that value as key, and the original
|
51
|
+
# key prepended to the arrays. The options hash accepts the following keys
|
38
52
|
# :case_insensitive, which defaults to true, and :index, which indicates that
|
39
53
|
# the original key should be the value of the hash entry, instead of the
|
40
54
|
# complete array of values.
|
@@ -60,69 +74,63 @@ class ArrayHash
|
|
60
74
|
}
|
61
75
|
}
|
62
76
|
|
63
|
-
if case_insensitive
|
64
|
-
class << new; self; end.instance_eval{
|
65
|
-
alias_method :old_get, :[]
|
66
|
-
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
67
|
-
}
|
68
|
-
end
|
77
|
+
new = make_case_insensitive new if case_insensitive
|
69
78
|
|
70
79
|
new
|
71
80
|
end
|
72
81
|
|
73
|
-
# Merge
|
82
|
+
# Merge one hash of arrays into another. Each hash contains a number of fields for each
|
74
83
|
# entry. The pos1 and pos2 indicate what fields should be used to match
|
75
84
|
# entries, the values for pos1 and pos2 can be an integer indicating the
|
76
85
|
# position in the array or the symbol :main to refer to the key of the hash.
|
77
86
|
# The options hash accepts the key :case_insensitive, which defaults to true.
|
78
87
|
def self.merge(hash1, hash2, pos1 = :main, pos2 = :main, options = {})
|
79
|
-
|
80
88
|
case_insensitive = options[:case_insensitive]; case_insensitive = true if case_insensitive.nil?
|
89
|
+
|
90
|
+
raise "Key #{ pos1 } should be an Interger or :main" unless Fixnum === pos1 || pos1.to_s.downcase == 'main'
|
91
|
+
raise "Key #{ pos2 } should be an Interger or :main" unless Fixnum === pos2 || pos2.to_s.downcase == 'main'
|
92
|
+
|
93
|
+
|
94
|
+
# Pullout if pos2 is not :main
|
95
|
+
hash2 = pullout(hash2, pos2) unless pos2.to_s.downcase == 'main'
|
96
|
+
|
97
|
+
# Translate if pos1 is not :main
|
81
98
|
if pos1.to_s.downcase != 'main'
|
82
|
-
|
83
|
-
elsif options[:case_insensitive]
|
99
|
+
index = pullout(hash1, pos1, options.merge(:index => true))
|
84
100
|
new = {}
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
define_method(:[], proc{|key| old_get(key.to_s.downcase)})
|
91
|
-
}
|
92
|
-
hash1 = new
|
101
|
+
hash2.each do |key, list|
|
102
|
+
next unless index[key]
|
103
|
+
new[index[key]] = list
|
104
|
+
end
|
105
|
+
hash2 = new
|
93
106
|
end
|
94
107
|
|
108
|
+
# Get the lengths of the arrays on each hash (they should
|
109
|
+
# be the same for every entry)
|
95
110
|
length1 = hash1.values.first.length
|
96
111
|
length2 = hash2.values.first.length
|
112
|
+
|
113
|
+
if case_insensitive
|
114
|
+
hash1 = make_case_insensitive hash1
|
115
|
+
hash2 = make_case_insensitive hash2
|
116
|
+
end
|
97
117
|
|
98
118
|
new = {}
|
99
|
-
hash2.each
|
100
|
-
|
101
|
-
|
102
|
-
k = key
|
103
|
-
v = values
|
104
|
-
when Fixnum === pos2
|
105
|
-
k = values[pos2]
|
106
|
-
v = values
|
107
|
-
v.delete_at(pos2)
|
108
|
-
v.unshift(key)
|
119
|
+
(hash1.keys + hash2.keys).uniq.each do |key|
|
120
|
+
if hash2[key].nil?
|
121
|
+
list2 = [''] * length2
|
109
122
|
else
|
110
|
-
|
123
|
+
list2 = hash2[key]
|
111
124
|
end
|
112
125
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
new[c] = hash1[c] || [''] * length1
|
118
|
-
new[c] += v
|
119
|
-
}
|
126
|
+
if hash1[key].nil?
|
127
|
+
list1 = [''] * length1
|
128
|
+
else
|
129
|
+
list1 = hash1[key]
|
120
130
|
end
|
121
|
-
}
|
122
131
|
|
123
|
-
|
124
|
-
|
125
|
-
}
|
132
|
+
new[key] = list1 + list2
|
133
|
+
end
|
126
134
|
|
127
135
|
new
|
128
136
|
end
|
@@ -139,7 +147,7 @@ class ArrayHash
|
|
139
147
|
new
|
140
148
|
end
|
141
149
|
|
142
|
-
# Clean structure for repeated values. If the same value
|
150
|
+
# Clean structure for repeated values. If the same value appears two times
|
143
151
|
# eliminate the one that appears latter on the values list (columns of the
|
144
152
|
# ArrayHash are assumed to be sorted for importance) if the appear on the
|
145
153
|
# same position, remove the one with the smaller vale of the code after
|
@@ -188,7 +196,7 @@ class ArrayHash
|
|
188
196
|
@data = hash
|
189
197
|
@main = main.to_s
|
190
198
|
|
191
|
-
if fields.nil?
|
199
|
+
if fields.nil? || fields.empty?
|
192
200
|
l = hash.values.first.length
|
193
201
|
fields = []
|
194
202
|
l.times{|i| fields << "F#{i}"}
|
@@ -207,10 +215,10 @@ class ArrayHash
|
|
207
215
|
# Returns the position of a given field in the value arrays
|
208
216
|
def field_pos(field)
|
209
217
|
return :main if field == :main
|
210
|
-
if field.downcase == self.main.downcase
|
218
|
+
if field.to_s.downcase == self.main.to_s.downcase
|
211
219
|
return :main
|
212
220
|
else
|
213
|
-
@fields.collect{|f| f.downcase}.index(field.to_s.downcase)
|
221
|
+
@fields.collect{|f| f.downcase }.index(field.to_s.downcase)
|
214
222
|
end
|
215
223
|
end
|
216
224
|
|
@@ -222,6 +230,9 @@ class ArrayHash
|
|
222
230
|
pos1 = self.field_pos(field)
|
223
231
|
pos2 = other.field_pos(field)
|
224
232
|
|
233
|
+
raise "Field #{ field } not found in target hash" if pos1.nil?
|
234
|
+
raise "Field #{ field } not found in added hash" if pos2.nil?
|
235
|
+
|
225
236
|
new = ArrayHash.merge(self.data, other.data, pos1, pos2, options)
|
226
237
|
@data = new
|
227
238
|
if pos2 == :main
|
data/lib/rbbt/util/index.rb
CHANGED
@@ -8,7 +8,7 @@ module Index
|
|
8
8
|
# where each element points to the first element in the row. +lexicon+
|
9
9
|
# is the file containing the data.
|
10
10
|
def self.index(lexicon, options = {})
|
11
|
-
options = {:sep => "\t
|
11
|
+
options = {:sep => "\t", :sep2 => '\|', :case_sensitive => true}.merge(options)
|
12
12
|
|
13
13
|
|
14
14
|
data = Open.to_hash(lexicon, options)
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -17,10 +17,13 @@ module Open
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def self.fields(line, sep = "\t")
|
20
|
-
|
20
|
+
line << sep
|
21
|
+
line << "PLACEHOLDER"
|
22
|
+
chunks = line.split(/(#{sep})/).select{|c| c !~ /^#{sep}$/ }
|
21
23
|
if line =~ /#{sep}$/
|
22
24
|
chunks << ""
|
23
25
|
end
|
26
|
+
chunks.pop
|
24
27
|
chunks
|
25
28
|
end
|
26
29
|
|
@@ -101,7 +104,7 @@ module Open
|
|
101
104
|
|
102
105
|
wait(options[:nice]) if options[:nice]
|
103
106
|
tmp = TmpFile.tmp_file("open-")
|
104
|
-
`wget -O #{tmp} '#{url}' #{options[:quiet] ? '-q' : '' }`
|
107
|
+
`wget --user-agent=firefox -O #{tmp} '#{url}' #{options[:quiet] ? '-q' : '' }`
|
105
108
|
|
106
109
|
if $?.success?
|
107
110
|
if gziped(url)
|
@@ -197,7 +200,7 @@ module Open
|
|
197
200
|
sep2 = options[:sep2] || "|"
|
198
201
|
single = options[:single]
|
199
202
|
single = false if single.nil?
|
200
|
-
flatten = options[:flatten]
|
203
|
+
flatten = options[:flatten]
|
201
204
|
flatten = single if flatten.nil?
|
202
205
|
|
203
206
|
extra = [extra] if extra && ! extra.is_a?( Array)
|
@@ -214,7 +217,7 @@ module Open
|
|
214
217
|
next if exclude and exclude.call(l)
|
215
218
|
next if select and ! select.call(l)
|
216
219
|
|
217
|
-
row_fields = self.fields(l, sep)
|
220
|
+
row_fields = self.fields(l.chomp, sep)
|
218
221
|
id = row_fields[native]
|
219
222
|
next if id.nil? || id == ""
|
220
223
|
|
@@ -19,11 +19,21 @@ class TestPubMed < Test::Unit::TestCase
|
|
19
19
|
pmids = ['16438716', 17204154]
|
20
20
|
assert(PubMed.get_article(pmids)[pmid].title == "Discovering semantic features in the literature: a foundation for building functional associations.")
|
21
21
|
end
|
22
|
-
|
22
|
+
|
23
|
+
def test_full_text
|
24
|
+
pmid = '16438716'
|
25
|
+
assert(PubMed.get_article(pmid).full_text =~ /Discovering/)
|
26
|
+
end
|
27
|
+
|
23
28
|
def test_query
|
24
29
|
assert(PubMed.query('chagoyen[All Fields] AND ("loattrfull text"[sb] AND hasabstract[text])').include? '16438716')
|
25
30
|
end
|
26
31
|
|
32
|
+
def test_bibentry
|
33
|
+
assert("vazquez2008sent", PubMed::Article.make_bibentry('vazquez', 2008, "SENT: Semantic features in text"))
|
34
|
+
assert("vazquez2008aes", PubMed::Article.make_bibentry('vazquez', 2008, "An Example System"))
|
35
|
+
end
|
36
|
+
|
27
37
|
end
|
28
38
|
|
29
39
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 2
|
8
|
-
-
|
9
|
-
version: 1.2.
|
8
|
+
- 5
|
9
|
+
version: 1.2.5
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Miguel Vazquez
|
@@ -14,13 +14,14 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2010-
|
17
|
+
date: 2010-10-22 00:00:00 +02:00
|
18
18
|
default_executable: rbbt_config
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: rake
|
22
22
|
prerelease: false
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
24
25
|
requirements:
|
25
26
|
- - ">="
|
26
27
|
- !ruby/object:Gem::Version
|
@@ -35,6 +36,7 @@ dependencies:
|
|
35
36
|
name: simpleconsole
|
36
37
|
prerelease: false
|
37
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
38
40
|
requirements:
|
39
41
|
- - ">="
|
40
42
|
- !ruby/object:Gem::Version
|
@@ -47,6 +49,7 @@ dependencies:
|
|
47
49
|
name: stemmer
|
48
50
|
prerelease: false
|
49
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
50
53
|
requirements:
|
51
54
|
- - ">="
|
52
55
|
- !ruby/object:Gem::Version
|
@@ -59,6 +62,7 @@ dependencies:
|
|
59
62
|
name: progress-monitor
|
60
63
|
prerelease: false
|
61
64
|
requirement: &id004 !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
62
66
|
requirements:
|
63
67
|
- - ">="
|
64
68
|
- !ruby/object:Gem::Version
|
@@ -71,6 +75,7 @@ dependencies:
|
|
71
75
|
name: simpleconsole
|
72
76
|
prerelease: false
|
73
77
|
requirement: &id005 !ruby/object:Gem::Requirement
|
78
|
+
none: false
|
74
79
|
requirements:
|
75
80
|
- - ">="
|
76
81
|
- !ruby/object:Gem::Version
|
@@ -150,6 +155,33 @@ files:
|
|
150
155
|
- tasks/install.rake
|
151
156
|
- LICENSE
|
152
157
|
- README.rdoc
|
158
|
+
- test/test_rbbt.rb
|
159
|
+
- test/rbbt/bow/test_bow.rb
|
160
|
+
- test/rbbt/bow/test_classifier.rb
|
161
|
+
- test/rbbt/bow/test_dictionary.rb
|
162
|
+
- test/rbbt/sources/test_organism.rb
|
163
|
+
- test/rbbt/sources/test_biomart.rb
|
164
|
+
- test/rbbt/sources/test_pubmed.rb
|
165
|
+
- test/rbbt/sources/test_polysearch.rb
|
166
|
+
- test/rbbt/sources/test_biocreative.rb
|
167
|
+
- test/rbbt/sources/test_entrez.rb
|
168
|
+
- test/rbbt/sources/test_go.rb
|
169
|
+
- test/rbbt/ner/test_rner.rb
|
170
|
+
- test/rbbt/ner/test_dictionaryNER.rb
|
171
|
+
- test/rbbt/ner/test_banner.rb
|
172
|
+
- test/rbbt/ner/test_rnorm.rb
|
173
|
+
- test/rbbt/ner/test_regexpNER.rb
|
174
|
+
- test/rbbt/ner/test_abner.rb
|
175
|
+
- test/rbbt/ner/rnorm/test_cue_index.rb
|
176
|
+
- test/rbbt/ner/rnorm/test_tokens.rb
|
177
|
+
- test/rbbt/util/test_misc.rb
|
178
|
+
- test/rbbt/util/test_tmpfile.rb
|
179
|
+
- test/rbbt/util/test_arrayHash.rb
|
180
|
+
- test/rbbt/util/test_filecache.rb
|
181
|
+
- test/rbbt/util/test_simpleDSL.rb
|
182
|
+
- test/rbbt/util/test_open.rb
|
183
|
+
- test/rbbt/util/test_index.rb
|
184
|
+
- test/test_helper.rb
|
153
185
|
has_rdoc: true
|
154
186
|
homepage: http://github.com/mikisvaz/rbbt
|
155
187
|
licenses: []
|
@@ -160,6 +192,7 @@ rdoc_options:
|
|
160
192
|
require_paths:
|
161
193
|
- lib
|
162
194
|
required_ruby_version: !ruby/object:Gem::Requirement
|
195
|
+
none: false
|
163
196
|
requirements:
|
164
197
|
- - ">="
|
165
198
|
- !ruby/object:Gem::Version
|
@@ -167,6 +200,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
167
200
|
- 0
|
168
201
|
version: "0"
|
169
202
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
203
|
+
none: false
|
170
204
|
requirements:
|
171
205
|
- - ">="
|
172
206
|
- !ruby/object:Gem::Version
|
@@ -176,7 +210,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
176
210
|
requirements: []
|
177
211
|
|
178
212
|
rubyforge_project:
|
179
|
-
rubygems_version: 1.3.
|
213
|
+
rubygems_version: 1.3.7
|
180
214
|
signing_key:
|
181
215
|
specification_version: 3
|
182
216
|
summary: Bioinformatics and text mining toolbox
|