rbbt 1.2.5 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +7 -0
  2. data/README.rdoc +2 -138
  3. metadata +69 -214
  4. data/LICENSE +0 -20
  5. data/bin/rbbt_config +0 -245
  6. data/install_scripts/classifier/R/classify.R +0 -36
  7. data/install_scripts/classifier/Rakefile +0 -140
  8. data/install_scripts/get_abner.sh +0 -2
  9. data/install_scripts/get_banner.sh +0 -25
  10. data/install_scripts/get_biocreative.sh +0 -72
  11. data/install_scripts/get_crf++.sh +0 -26
  12. data/install_scripts/get_entrez.sh +0 -4
  13. data/install_scripts/get_go.sh +0 -4
  14. data/install_scripts/get_polysearch.sh +0 -8
  15. data/install_scripts/ner/Rakefile +0 -206
  16. data/install_scripts/ner/config/default.rb +0 -52
  17. data/install_scripts/norm/Rakefile +0 -219
  18. data/install_scripts/norm/config/cue_default.rb +0 -10
  19. data/install_scripts/norm/config/tokens_default.rb +0 -86
  20. data/install_scripts/norm/functions.sh +0 -23
  21. data/install_scripts/organisms/Ath.Rakefile +0 -55
  22. data/install_scripts/organisms/Cal.Rakefile +0 -84
  23. data/install_scripts/organisms/Cel.Rakefile +0 -109
  24. data/install_scripts/organisms/Hsa.Rakefile +0 -140
  25. data/install_scripts/organisms/Mmu.Rakefile +0 -77
  26. data/install_scripts/organisms/Rakefile +0 -43
  27. data/install_scripts/organisms/Rno.Rakefile +0 -88
  28. data/install_scripts/organisms/Sce.Rakefile +0 -66
  29. data/install_scripts/organisms/Spo.Rakefile +0 -40
  30. data/install_scripts/organisms/rake-include.rb +0 -252
  31. data/install_scripts/wordlists/consonants +0 -897
  32. data/install_scripts/wordlists/stopwords +0 -1
  33. data/lib/rbbt.rb +0 -83
  34. data/lib/rbbt/bow/bow.rb +0 -88
  35. data/lib/rbbt/bow/classifier.rb +0 -116
  36. data/lib/rbbt/bow/dictionary.rb +0 -187
  37. data/lib/rbbt/ner/abner.rb +0 -34
  38. data/lib/rbbt/ner/banner.rb +0 -73
  39. data/lib/rbbt/ner/dictionaryNER.rb +0 -98
  40. data/lib/rbbt/ner/regexpNER.rb +0 -70
  41. data/lib/rbbt/ner/rner.rb +0 -227
  42. data/lib/rbbt/ner/rnorm.rb +0 -143
  43. data/lib/rbbt/ner/rnorm/cue_index.rb +0 -80
  44. data/lib/rbbt/ner/rnorm/tokens.rb +0 -217
  45. data/lib/rbbt/sources/biocreative.rb +0 -75
  46. data/lib/rbbt/sources/biomart.rb +0 -105
  47. data/lib/rbbt/sources/entrez.rb +0 -211
  48. data/lib/rbbt/sources/go.rb +0 -85
  49. data/lib/rbbt/sources/gscholar.rb +0 -74
  50. data/lib/rbbt/sources/organism.rb +0 -241
  51. data/lib/rbbt/sources/polysearch.rb +0 -117
  52. data/lib/rbbt/sources/pubmed.rb +0 -248
  53. data/lib/rbbt/util/arrayHash.rb +0 -266
  54. data/lib/rbbt/util/filecache.rb +0 -72
  55. data/lib/rbbt/util/index.rb +0 -47
  56. data/lib/rbbt/util/misc.rb +0 -106
  57. data/lib/rbbt/util/open.rb +0 -251
  58. data/lib/rbbt/util/rake.rb +0 -183
  59. data/lib/rbbt/util/simpleDSL.rb +0 -87
  60. data/lib/rbbt/util/tmpfile.rb +0 -35
  61. data/tasks/install.rake +0 -124
  62. data/test/rbbt/bow/test_bow.rb +0 -33
  63. data/test/rbbt/bow/test_classifier.rb +0 -72
  64. data/test/rbbt/bow/test_dictionary.rb +0 -91
  65. data/test/rbbt/ner/rnorm/test_cue_index.rb +0 -57
  66. data/test/rbbt/ner/rnorm/test_tokens.rb +0 -70
  67. data/test/rbbt/ner/test_abner.rb +0 -17
  68. data/test/rbbt/ner/test_banner.rb +0 -17
  69. data/test/rbbt/ner/test_dictionaryNER.rb +0 -122
  70. data/test/rbbt/ner/test_regexpNER.rb +0 -33
  71. data/test/rbbt/ner/test_rner.rb +0 -126
  72. data/test/rbbt/ner/test_rnorm.rb +0 -47
  73. data/test/rbbt/sources/test_biocreative.rb +0 -38
  74. data/test/rbbt/sources/test_biomart.rb +0 -31
  75. data/test/rbbt/sources/test_entrez.rb +0 -49
  76. data/test/rbbt/sources/test_go.rb +0 -24
  77. data/test/rbbt/sources/test_organism.rb +0 -59
  78. data/test/rbbt/sources/test_polysearch.rb +0 -27
  79. data/test/rbbt/sources/test_pubmed.rb +0 -39
  80. data/test/rbbt/util/test_arrayHash.rb +0 -257
  81. data/test/rbbt/util/test_filecache.rb +0 -37
  82. data/test/rbbt/util/test_index.rb +0 -31
  83. data/test/rbbt/util/test_misc.rb +0 -20
  84. data/test/rbbt/util/test_open.rb +0 -110
  85. data/test/rbbt/util/test_simpleDSL.rb +0 -57
  86. data/test/rbbt/util/test_tmpfile.rb +0 -21
  87. data/test/test_helper.rb +0 -4
  88. data/test/test_rbbt.rb +0 -11
@@ -1,241 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
3
- require 'rbbt/util/index'
4
-
5
- # This module contains some Organism centric functionalities. Each organism is
6
- # identified by a keyword.
7
- module Organism
8
-
9
- # Raised when trying to access information for an organism that has not been
10
- # prepared already.
11
- class OrganismNotProcessedError < StandardError; end
12
-
13
- # Return the list of all supported organisms. The prepared flag is used to
14
- # show only those that have been prepared.
15
- def self.all(prepared = true)
16
- if prepared
17
- Dir.glob(File.join(Rbbt.datadir,'/organisms/') + '/*/identifiers').collect{|f| File.basename(File.dirname(f))}
18
- else
19
- Dir.glob(File.join(Rbbt.datadir,'/organisms/') + '/*').select{|f| File.directory? f}.collect{|f| File.basename(f)}
20
- end
21
- end
22
-
23
-
24
- # Return the complete name of an organism. The org parameter is the organism
25
- # keyword
26
- def self.name(org)
27
- raise OrganismNotProcessedError, "Missing 'name' file" if ! File.exists? File.join(Rbbt.datadir,"organisms/#{ org }/name")
28
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/name"))
29
- end
30
-
31
- # Hash linking all the organism log names with their keywords in Rbbt. Its
32
- # the inverse of the name method.
33
- NAME2ORG = {}
34
- Organism::all.each{|org|
35
- name = Organism.name(org).strip.downcase
36
- NAME2ORG[name] = org
37
- }
38
-
39
-
40
- # Return the key word associated with an organism.
41
- def self.name2org(name)
42
- NAME2ORG[name.strip.downcase]
43
- end
44
-
45
- # FIXME: The NER related stuff is harder to install, thats why we hide the
46
- # requires next to where they are needed, next to options
47
-
48
- # Return a NER object which could be of RNER, Abner or Banner class, this is
49
- # selected using the type parameter.
50
- def self.ner(org, type=:rner, options = {})
51
-
52
- case type.to_sym
53
- when :abner
54
- require 'rbbt/ner/abner'
55
- return Abner.new
56
- when :banner
57
- require 'rbbt/ner/banner'
58
- return Banner.new
59
- when :rner
60
- require 'rbbt/ner/rner'
61
- model = options[:model]
62
- model ||= File.join(Rbbt.datadir,"ner/model/#{ org }") if File.exist? File.join(Rbbt.datadir,"ner/model/#{ org }")
63
- model ||= File.join(Rbbt.datadir,'ner/model/BC2')
64
- return NER.new(model)
65
- else
66
- raise "Ner type (#{ type }) unknown"
67
- end
68
-
69
- end
70
-
71
- # Return a normalization object.
72
- def self.norm(org, to_entrez = nil)
73
- require 'rbbt/ner/rnorm'
74
- if to_entrez.nil?
75
- to_entrez = id_index(org, :native => 'Entrez Gene Id', :other => [supported_ids(org).first])
76
- end
77
-
78
- token_file = File.join(Rbbt.datadir, 'norm','config',org.to_s + '.config')
79
- if !File.exists? token_file
80
- token_file = nil
81
- end
82
-
83
- Normalizer.new(File.join(Rbbt.datadir,"organisms/#{ org }/lexicon"), :to_entrez => to_entrez, :file => token_file, :max_candidates => 20)
84
- end
85
-
86
- # Returns a hash with the names associated with each gene id. The ids are
87
- # in Rbbt native format for that organism.
88
- def self.lexicon(org, options = {})
89
- options = {:sep => "\t|\\|", :flatten => true}.merge(options)
90
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/lexicon"),options)
91
- end
92
-
93
- # Returns a hash with the list of go terms for each gene id. Gene ids are in
94
- # Rbbt native format for that organism.
95
- def self.goterms(org)
96
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/gene.go"), :flatten => true)
97
- end
98
-
99
- # Return list of PubMed ids associated to the organism. Determined using a
100
- # PubMed query with the name of the organism
101
- def self.literature(org)
102
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/all.pmid")).scan(/\d+/)
103
- end
104
-
105
- # Return hash that associates genes to a list of PubMed ids.
106
- def self.gene_literature(org)
107
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/gene.pmid"), :flatten => true)
108
- end
109
-
110
- # Return hash that associates genes to a list of PubMed ids. Includes only
111
- # those found to support GO term associations.
112
- def self.gene_literature_go(org)
113
- Open.to_hash(File.join(Rbbt.datadir,"organisms/#{ org }/gene_go.pmid"), :flatten => true)
114
- end
115
-
116
- # Returns a list with the names of the id formats supported for an organism.
117
- # If examples are produced, the list is of [format, example] pairs.
118
- #
119
- # *Options:*
120
- #
121
- # *examples:* Include example ids for each format
122
- def self.supported_ids(org, options = {})
123
- formats = []
124
- examples = [] if options[:examples]
125
- i= 0
126
- Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers")).each_line{|l|
127
- if i == 0
128
- i += 1
129
- next unless l=~/^\s*#/
130
- formats = Open.fields(l.sub(/^[\s#]+/,'')).collect{|n| n.strip}
131
- return formats unless examples
132
- next
133
- end
134
-
135
- if Open.fields(l).select{|name| name && name =~ /\w/}.length > examples.compact.length
136
- examples = Open.fields(l).collect{|name| name.split(/\|/).first}
137
- end
138
- i += 1
139
- }
140
-
141
- formats.zip(examples)
142
- end
143
-
144
- # Creates a hash where each possible id is associated with the names of the
145
- # formats (its potentially possible for different formats to have the same
146
- # id). This is used in the guessIdFormat method.
147
- def self.id_formats(org)
148
- id_types = {}
149
- formats = supported_ids(org)
150
-
151
- text = Open.read(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"))
152
-
153
- if text.respond_to? :collect
154
- lines = text.collect
155
- else
156
- lines = text.lines
157
- end
158
-
159
- lines.each{|l|
160
- ids_per_type = Open.fields(l)
161
- formats.zip(ids_per_type).each{|p|
162
- format = p[0]
163
- p[1] ||= ""
164
- ids = p[1].split(/\|/)
165
- ids.each{|id|
166
- next if id.nil? || id == ""
167
- id_types[id.downcase] ||= []
168
- id_types[id.downcase] << format unless id_types[id.downcase].include? format
169
- }
170
- }
171
- }
172
-
173
- return id_types
174
- end
175
-
176
- def self.guessIdFormat(formats, query)
177
- query = query.compact.collect{|gene| gene.downcase}.uniq
178
- if String === formats
179
- formats = id_formats(formats)
180
- end
181
-
182
- return nil if formats.values.empty?
183
- values = formats.values_at(*query)
184
- return nil if values.empty?
185
-
186
- format_count = {}
187
- values.compact.collect{|types| types.uniq}.flatten.each{|f|
188
- format_count[f] ||= 0
189
- format_count[f] += 1
190
- }
191
-
192
- return nil if format_count.values.empty?
193
- format_count.select{|k,v| v > (query.length / 10)}.sort{|a,b| b[1] <=> a[1]}.first
194
- end
195
-
196
- def self.id_position(supported_ids, id_name, options = {})
197
- pos = 0
198
- supported_ids.each_with_index{|id, i|
199
- if id.strip == id_name.strip || !options[:case_sensitive] && id.strip.downcase == id_name.strip.downcase
200
- pos = i;
201
- end
202
- }
203
- pos
204
- end
205
-
206
- def self.id_index(org, options = {})
207
- native = options[:native]
208
- other = options[:other]
209
- options[:case_sensitive] = false if options[:case_sensitive].nil?
210
-
211
- if native.nil? and other.nil?
212
- Index.index(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"), options)
213
- else
214
- supported = Organism.supported_ids(org)
215
-
216
- first = nil
217
- if native
218
- first = id_position(supported,native,options)
219
- raise "No match for native format '#{ native }'" if first.nil?
220
- else
221
- first = 0
222
- end
223
-
224
- rest = nil
225
- if other
226
- rest = other.collect{|name| id_position(supported,name, options)}
227
- else
228
- rest = (0..supported.length - 1).to_a - [first]
229
- end
230
-
231
- options[:native] = first
232
- options[:extra] = rest
233
- options[:sep] = "\t"
234
- index = Index.index(File.join(Rbbt.datadir,"organisms/#{ org }/identifiers"), options)
235
-
236
- index
237
- end
238
- end
239
-
240
- end
241
-
@@ -1,117 +0,0 @@
1
- require 'rbbt'
2
- require 'rbbt/util/open'
3
- require 'rbbt/ner/regexpNER'
4
- require 'rbbt/ner/dictionaryNER'
5
-
6
- # Find terms in the Polysearch thesauri using simple regular expression
7
- # matching. Note that the first time the methods are used the correspondent
8
- # thesaurus are loaded into memory. The available thesauri are: disease, drug,
9
- # metabolite, organ, subcellular (subcellular localization) and tissue.
10
- module Polysearch
11
-
12
-
13
- @@names = {}
14
- def self.type_names(type) #:nodoc:
15
- @@names[type] ||= Open.to_hash(File.join(Rbbt.datadir,'dbs','polysearch',type.to_s + '.txt'), :single => true)
16
- end
17
-
18
-
19
- @@indexes = {}
20
- def self.type_index(type) #:nodoc:
21
- if $stopwords
22
- stopwords = $stopwords
23
- else
24
- stopwords = []
25
- end
26
-
27
- case type.to_sym
28
- when :disease
29
- stopwords << 'use'
30
- end
31
-
32
- @@indexes[type] ||= RegExpNER.new(File.join(Rbbt.datadir,'dbs','polysearch',type.to_s + '.txt'), :stopwords => stopwords)
33
- end
34
-
35
- # Find matches in a string of text, the types array specifies which thesauri
36
- # to use, if if nil it will use all.
37
- def self.match(text, types = nil)
38
- if types.nil?
39
- types = %w(disease drug metabolite organ subcellular tissue)
40
- end
41
-
42
- types = [types] unless Array === types
43
- types = types.sort
44
-
45
- matches = {}
46
- types.collect{|type|
47
- matches.merge!(type_index(type).match(text))
48
- }
49
-
50
- matches
51
- end
52
-
53
- # Transform the code into a name, type is the thesaurus to use
54
- def self.name(type, code)
55
- type_names(type)[code]
56
- end
57
-
58
- end
59
-
60
- if __FILE__ == $0
61
- text =<<-EOT
62
-
63
- Background Microorganisms adapt their transcriptome by integrating
64
- multiple chemical and physical signals from their environment. Shake-flask
65
- cultivation does not allow precise manipulation of individual culture
66
- parameters and therefore precludes a quantitative analysis of the
67
- (combinatorial) influence of these parameters on transcriptional
68
- regulation. Steady-state chemostat cultures, which do enable accurate
69
- control, measurement and manipulation of individual cultivation parameters
70
- (e.g. specific growth rate, temperature, identity of the growth-limiting
71
- nutrient) appear to provide a promising experimental platform for such a
72
- combinatorial analysis. Results A microarray compendium of 170
73
- steady-state chemostat cultures of the yeast Saccharomyces cerevisiae is
74
- presented and analyzed. The 170 microarrays encompass 55 unique
75
- conditions, which can be characterized by the combined settings of 10
76
- different cultivation parameters. By applying a regression model to assess
77
- the impact of (combinations of) cultivation parameters on the
78
- transcriptome, most S. cerevisiae genes were shown to be influenced by
79
- multiple cultivation parameters, and in many cases by combinatorial
80
- effects of cultivation parameters. The inclusion of these combinatorial
81
- effects in the regression model led to higher explained variance of the
82
- gene expression patterns and resulted in higher function enrichment in
83
- subsequent analysis. We further demonstrate the usefulness of the
84
- compendium and regression analysis for interpretation of shake-flask-based
85
- transcriptome studies and for guiding functional analysis of
86
- (uncharacterized) genes and pathways. Conclusions Modeling the
87
- combinatorial effects of environmental parameters on the transcriptome is
88
- crucial for understanding transcriptional regulation. Chemostat
89
- cultivation offers a powerful tool for such an approach. Keywords:
90
- chemostat steady state samples Cerebellar stroke syndrome
91
-
92
-
93
- EOT
94
-
95
- require 'benchmark'
96
- require 'ruby-prof'
97
-
98
- puts Benchmark.measure{
99
- p Polysearch.match(text,'disease')
100
- }
101
-
102
-
103
- RubyProf.start
104
-
105
- Polysearch.match(text,'disease')
106
-
107
- result = RubyProf.stop
108
-
109
- # Print a flat profile to text
110
- printer = RubyProf::FlatPrinter.new(result)
111
- printer.print(STDOUT, 0)
112
-
113
- puts Benchmark.measure{
114
- 10.times{ p Polysearch.match(text,'disease') }
115
- }
116
-
117
- end
@@ -1,248 +0,0 @@
1
- require 'rbbt/util/filecache'
2
- require 'rbbt/util/open'
3
- require 'rbbt/sources/gscholar'
4
- require 'rbbt'
5
- require 'libxml'
6
-
7
- # This module offers an interface with PubMed, to perform queries, and
8
- # retrieve simple information from articles. It uses the caching
9
- # services of Rbbt.
10
- module PubMed
11
-
12
- private
13
- @@last = Time.now
14
- @@pubmed_lag = 1
15
- def self.get_online(pmids)
16
-
17
- pmid_list = ( pmids.is_a?(Array) ? pmids.join(',') : pmids.to_s )
18
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&retmode=xml&id=#{pmid_list}"
19
-
20
- diff = Time.now - @@last
21
- sleep @@pubmed_lag - diff unless diff > @@pubmed_lag
22
-
23
- xml = Open.read(url, :quiet => true, :nocache => true)
24
-
25
- @@last = Time.now
26
-
27
- articles = xml.scan(/(<PubmedArticle>.*?<\/PubmedArticle>)/smu).flatten
28
-
29
- if pmids.is_a? Array
30
- list = {}
31
- articles.each{|article|
32
- pmid = article.scan(/<PMID>(.*?)<\/PMID>/).flatten.first
33
- list[pmid] = article
34
- }
35
- return list
36
- else
37
- return articles.first
38
- end
39
-
40
- end
41
-
42
- public
43
-
44
- # Processes the xml with an articles as served by MedLine and extracts
45
- # the abstract, title and journal information
46
- class Article
47
-
48
-
49
- XML_KEYS = [
50
- [:title , "ArticleTitle"],
51
- [:journal , "Journal/Title"],
52
- [:issue , "Journal/JournalIssue/Issue"],
53
- [:volume , "Journal/JournalIssue/Volume"],
54
- [:issn , "Journal/ISSN"],
55
- [:year , "Journal/JournalIssue/PubDate/Year"],
56
- [:month , "Journal/JournalIssue/PubDate/Month"],
57
- [:pages , "Pagination/MedlinePgn"],
58
- [:abstract , "Abstract/AbstractText"],
59
- ]
60
-
61
- PMC_PDF_URL = "http://www.ncbi.nlm.nih.gov/pmc/articles/PMCID/pdf/"
62
-
63
- def self.escape_title(title)
64
- title.gsub(/(\w*[A-Z][A-Z]+\w*)/, '{\1}')
65
- end
66
-
67
- def self.make_bibentry(lastname, year, title)
68
- words = title.downcase.scan(/\w+/)
69
- if words.first.length > 3
70
- abrev = words.first
71
- else
72
- abrev = words[0..2].collect{|w| w.chars.first} * ""
73
- end
74
- [lastname.gsub(/\s/,'_'), year || "NOYEAR", abrev] * ""
75
- end
76
- def self.parse_xml(xml)
77
- parser = LibXML::XML::Parser.string(xml)
78
- pubmed = parser.parse.find("/PubmedArticle").first
79
- medline = pubmed.find("MedlineCitation").first
80
- article = medline.find("Article").first
81
-
82
- info = {}
83
-
84
- info[:pmid] = medline.find("PMID").first.content
85
-
86
- XML_KEYS.each do |p|
87
- name, key = p
88
- node = article.find(key).first
89
-
90
- next if node.nil?
91
-
92
- info[name] = node.content
93
- end
94
-
95
- bibentry = nil
96
- info[:author] = article.find("AuthorList/Author").collect do |author|
97
- begin
98
- lastname = author.find("LastName").first.content
99
- if author.find("ForeName").first.nil?
100
- forename = nil
101
- else
102
- forename = author.find("ForeName").first.content.split(/\s/).collect{|word| if word.length == 1; then word + '.'; else word; end} * " "
103
- end
104
- bibentry ||= make_bibentry lastname, info[:year], info[:title]
105
- rescue
106
- end
107
- [lastname, forename] * ", "
108
- end * " and "
109
-
110
- info[:bibentry] = bibentry.downcase if bibentry
111
-
112
- info[:pmc_pdf] = pubmed.find("PubmedData/ArticleIdList/ArticleId").select{|id| id[:IdType] == "pmc"}.first
113
-
114
- if info[:pmc_pdf]
115
- info[:pmc_pdf] = PMC_PDF_URL.sub(/PMCID/, info[:pmc_pdf].content)
116
- end
117
-
118
- info
119
- end
120
-
121
- attr_accessor :title, :abstract, :journal, :author, :pmid, :bibentry, :pmc_pdf, :gscholar_pdf, :pdf_url
122
- attr_accessor *XML_KEYS.collect{|p| p.first }
123
-
124
- def initialize(xml)
125
- if xml && ! xml.empty?
126
- info = PubMed::Article.parse_xml xml
127
- info.each do |key, value|
128
- self.send("#{ key }=", value)
129
- end
130
- end
131
- end
132
-
133
- def pdf_url
134
- return pmc_pdf if pmc_pdf
135
- @gscholar_pdf ||= GoogleScholar::full_text_url title
136
- end
137
-
138
- def full_text
139
- return nil if pdf_url.nil?
140
-
141
- text = nil
142
- TmpFile.with_file do |pdf|
143
-
144
- # Change user-agent, oh well...
145
- `wget --user-agent=firefox #{ pdf_url } -O #{ pdf }`
146
- TmpFile.with_file do |txt|
147
- `pdftotext #{ pdf } #{ txt }`
148
- text = Open.read(txt) if File.exists? txt
149
- end
150
- end
151
-
152
- text
153
- end
154
-
155
- def bibtex
156
- keys = [:author] + XML_KEYS.collect{|p| p.first } - [:bibentry]
157
- bibtex = "@article{#{bibentry},\n"
158
-
159
- keys.each do |key|
160
- next if self.send(key).nil?
161
-
162
- case key
163
-
164
- when :title
165
- bibtex += " title = { #{ PubMed::Article.escape_title title } },\n"
166
-
167
- when :issue
168
- bibtex += " number = { #{ issue } },\n"
169
-
170
- else
171
- bibtex += " #{ key } = { #{ self.send(key) } },\n"
172
- end
173
-
174
- end
175
-
176
- bibtex += " fulltext = { #{ pdf_url } },\n" if pdf_url
177
- bibtex += " pmid = { #{ pmid } }\n}"
178
-
179
-
180
- bibtex
181
- end
182
-
183
- # Join the text from title and abstract
184
- def text
185
- [title, abstract].join("\n")
186
- end
187
- end
188
-
189
- # Returns the Article object containing the information for the PubMed
190
- # ID specified as an argument. If +pmid+ is an array instead of a single
191
- # identifier it returns an hash with the Article object for each id.
192
- # It uses the Rbbt cache to save the articles xml.
193
- def self.get_article(pmid)
194
-
195
- if pmid.is_a? Array
196
- missing = []
197
- list = {}
198
-
199
- pmid.each{|p|
200
- filename = p.to_s + '.xml'
201
- if File.exists? FileCache.path(filename)
202
- list[p] = Article.new(Open.read(FileCache.path(filename)))
203
- else
204
- missing << p
205
- end
206
- }
207
-
208
- return list unless missing.any?
209
- chunk_size = [100, missing.length].min
210
- chunks = (missing.length.to_f / chunk_size).ceil
211
-
212
- articles = {}
213
- chunks.times do |chunk|
214
- pmids = missing[(chunk * chunk_size)..((chunk + 1) *chunk_size)]
215
- articles.merge!(get_online(pmids))
216
- end
217
-
218
- articles.each{|p, xml|
219
- filename = p + '.xml'
220
- FileCache.add_file(filename,xml, :force => true)
221
- list[p] = Article.new(xml)
222
- }
223
-
224
- return list
225
-
226
- else
227
- filename = pmid.to_s + '.xml'
228
-
229
- if File.exists? FileCache.path(filename)
230
- return Article.new(Open.read(FileCache.path(filename)))
231
- else
232
- xml = get_online(pmid)
233
- FileCache.add_file(filename,xml)
234
-
235
- return Article.new(xml)
236
- end
237
- end
238
- end
239
-
240
- # Performs the specified query and returns an array with the PubMed
241
- # Ids returned. +retmax+ can be used to limit the number of ids
242
- # returned, if is not specified 30000 is used.
243
- def self.query(query, retmax=nil)
244
- retmax ||= 30000
245
-
246
- Open.read("http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?retmax=#{retmax}&db=pubmed&term=#{query}",:quiet => true, :nocache => true).scan(/<Id>(\d+)<\/Id>/).flatten
247
- end
248
- end