bivy 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/.gitignore +21 -0
  2. data/History +16 -0
  3. data/LICENSE +29 -0
  4. data/README.rdoc +37 -0
  5. data/Rakefile +43 -0
  6. data/TODO +12 -0
  7. data/VERSION +1 -0
  8. data/bin/bivy.rb +56 -0
  9. data/bin/pubmed_to_bivy.rb +78 -0
  10. data/doc/config.yaml +13 -0
  11. data/doc/src/default.css +126 -0
  12. data/doc/src/default.template +38 -0
  13. data/doc/src/tutorial/basic_flow.page +71 -0
  14. data/doc/src/tutorial/index.page +8 -0
  15. data/doc/src/tutorial/new_formats_and_media.page +83 -0
  16. data/jrn_abbrev/abbr_html.tgz +0 -0
  17. data/jrn_abbrev/abbr_to_journal.yaml +676 -0
  18. data/jrn_abbrev/download_abbrevs.rb +20 -0
  19. data/jrn_abbrev/for_ruby_class.rb +686 -0
  20. data/jrn_abbrev/html_to_yaml.rb +50 -0
  21. data/lib/bibliography.rb +144 -0
  22. data/lib/bivy.rb +4 -0
  23. data/lib/citation.rb +194 -0
  24. data/lib/format.rb +120 -0
  25. data/lib/format/acs.rb +88 -0
  26. data/lib/format/bioinformatics.rb +33 -0
  27. data/lib/format/bmc.rb +38 -0
  28. data/lib/format/jtp.rb +30 -0
  29. data/lib/format/mla.rb +50 -0
  30. data/lib/formatter.rb +276 -0
  31. data/lib/journal.rb +6 -0
  32. data/lib/journal/iso_to_full.yaml +1320 -0
  33. data/lib/journal/medline_to_full.yaml +7 -0
  34. data/lib/journal/medline_to_iso.yaml +45 -0
  35. data/lib/media.rb +88 -0
  36. data/lib/media/html.rb +65 -0
  37. data/lib/ooffice.rb +39 -0
  38. data/lib/pubmed.rb +209 -0
  39. data/lib/rtf.rb +217 -0
  40. data/old_stuff/old_list2refs.rb +103 -0
  41. data/old_stuff/pubmed2html.rb +119 -0
  42. data/old_stuff/pubmed_bib_write.rb +92 -0
  43. data/old_stuff/xml.tmp.xml +115 -0
  44. data/scripts/merge_bibs.rb +70 -0
  45. data/spec/bibliography_spec.rb +127 -0
  46. data/spec/citation_positions.odt +0 -0
  47. data/spec/formatter_spec.rb +14 -0
  48. data/spec/formatter_spec/cits_after.xml +2 -0
  49. data/spec/formatter_spec/cits_before.xml +2 -0
  50. data/spec/formatter_spec/content.xml +2 -0
  51. data/spec/ooffice_spec.rb +27 -0
  52. data/spec/pubmed_spec.rb +26 -0
  53. data/spec/spec_helper.rb +7 -0
  54. data/spec/testfiles/doc1.odt +0 -0
  55. metadata +136 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ Anal Chem: Analytical Chemistry
3
+ Mol Cell Proteomics: Molecular and Cellular Proteomics
4
+ Cell Signal: Cellular Signalling
5
+ J Proteome Res: Journal of Proteome Research
6
+ Nat Chem Biol: Nature Chemical Biology
7
+ Nat Biotechnol: Nature Biotechnology
@@ -0,0 +1,45 @@
1
+ ---
2
+ Nature: Nature
3
+ Biomed Mass Spectrom: Biomed. Mass Spectrom.
4
+ Nat Biotechnol: Nat. Biotechnol.
5
+ Anal. Chem.: Anal. Chem.
6
+ Nat Chem Biol: Nat. Chem. Biol.
7
+ J. Proteome Res.: J. Proteome Res.
8
+ Rapid Commun Mass Spectrom: Rapid Commun. Mass Spectrom.
9
+ KDD Workshop on Mining Temporal and Sequential Data: KDD Workshop MTSD
10
+ J Chromatogr B Analyt Technol Biomed Life Sci: J. Chromatogr., B
11
+ J. Chromatogr., A: J. Chromatogr., A
12
+ J Chemom: J. Chemom.
13
+ Genome Res: Genome Res.
14
+ Biomed. Mass Spectrom.: Biomed. Mass Spectrom.
15
+ SIAM J. Num. Anal.: SIAM J. Num. Anal.
16
+ Proteomics: Proteomics
17
+ J. Chemom.: J. Chemom.
18
+ Eur. Food Res. Technol.: Eur. Food Res. Technol.
19
+ Environ Sci Technol: Environ. Sci. Technol.
20
+ Analytica Chimica Acta: Anal. Chim. Acta
21
+ Nature Methods: Nature Methods
22
+ J Chromatogr A: J. Chromatogr., A
23
+ IEEE ASSP: IEEE ASSP
24
+ Nucleic Acids Res: Nucleic Acids Res.
25
+ Mol Cell Proteomics: Mol. Cell. Proteomics
26
+ J Proteome Res: J. Proteome Res.
27
+ J Mol Biol: J. Mol. Biol.
28
+ J. Am. Soc. Mass Spectrom.: J. Am. Soc. Mass Spectrom.
29
+ J. Mol. Biol.: J. Mol. Biol.
30
+ Bioinformatics: Bioinformatics
31
+ Anal Chem: Anal. Chem.
32
+ Mol Cell Proteomics: Mol. Cell. Proteomics
33
+ Cell Signal: Cell Signal.
34
+ Curr Opin Chem Biol: Curr. Opin. Chem. Biol.
35
+ Proc IEEE Comput Syst Bioinform Conf: Proc. IEEE Comput. Syst. Bioinform. Conf.
36
+ Lab Chip: Lab Chip
37
+ Clin J Oncol Nurs: Clin. J. Oncol. Nurs.
38
+ Mass Spectrom Rev: Mass Spectrom. Rev.
39
+ Nat Protoc: Nat. Protoc.
40
+ Conf Proc IEEE Eng Med Biol Soc: Conf. Proc. IEEE Eng. Med. Biol. Soc.
41
+ Curr Opin Oncol: Curr. Opin. Oncol.
42
+ J Mass Spectrom: J. Mass Spectrom.
43
+ Trends Pharmacol Sci: Trends Pharmacol. Sci.
44
+ Assay Drug Dev Technol: Assay Drug Dev. Technol.
45
+ Mol Pharmacol: Mol. Pharmacol.
data/lib/media.rb ADDED
@@ -0,0 +1,88 @@
1
+
2
+ module Media
3
+ # note that you need to add the shortcut to module Format::MediaForwarding
4
+ # hash if you want to be able to access it!
5
+
6
+ # add to this class the conversion from the filename (as a symbol) to the
7
+ # properly capitalized classname. If the class name is just capitalized and
8
+ # all lower case, not necessary.
9
+ Symbol_to_class_string = { }
10
+ #:html => 'HTML'
11
+
12
+ def self.new(tp=:jtp)
13
+ require "media/#{tp}"
14
+ #puts( $".grep(/html/) )
15
+ klass_st = ((x = Symbol_to_class_string[tp]) ? x : tp.to_s.capitalize)
16
+ klass = Media.const_get(klass_st)
17
+ klass.new
18
+ end
19
+
20
+ def header
21
+ end
22
+
23
+ def footer
24
+ end
25
+
26
+ def call_it(method, string)
27
+ if var = string
28
+ send(method, var.to_s)
29
+ else
30
+ nil
31
+ end
32
+ end
33
+
34
+ def parenthesize(string)
35
+ '(' + string + ')'
36
+ end
37
+
38
+ def bracket(string)
39
+ '[' + string + ']'
40
+ end
41
+
42
+ def br(string)
43
+ call_it(:bracket, string)
44
+ end
45
+
46
+ def par(string)
47
+ call_it(:parenthesize, string)
48
+ end
49
+
50
+ # italicize
51
+ def i(string)
52
+ call_it(:italics, string)
53
+ end
54
+
55
+ # bold
56
+ def b(string)
57
+ call_it(:bold, string)
58
+ end
59
+
60
+ # underline
61
+ def u(string)
62
+ call_it(:underline, string)
63
+ end
64
+
65
+ def italics(string)
66
+ "<span style=\"font-style:italic;\">" + string + "</span>"
67
+ end
68
+
69
+ def bold(string)
70
+ "<span style=\"font-weight:bold;\">" + string + "</span>"
71
+ end
72
+
73
+ def underline(string)
74
+ "<span style=\"text-decoration:underline;\">" + string + "</span>"
75
+ end
76
+
77
+ # create the final bibliography string in whatever media you desire
78
+ # the example here is html
79
+ def format(format_object, citations)
80
+ cts = citations.map do |cit|
81
+ " <li>" + format_object.format(cit) + "</li>"
82
+ end
83
+ "<ol>\n" + cts.join("\n") + "\n</ol>\n"
84
+ end
85
+
86
+ end
87
+
88
+
data/lib/media/html.rb ADDED
@@ -0,0 +1,65 @@
1
+
2
+ class Media::HTML
3
+ include Media
4
+ Media::Symbol_to_class_string[:html] = 'HTML'
5
+
6
+ def header
7
+ "<html><body>"
8
+ end
9
+
10
+ def footer
11
+ "</body></html>"
12
+ end
13
+
14
+ def italics(string)
15
+ "<span style=\"font-style:italic;\">" + string + "</span>"
16
+ end
17
+
18
+ def bold(string)
19
+ "<span style=\"font-weight:bold;\">" + string + "</span>"
20
+ end
21
+
22
+ def underline(string)
23
+ "<span style=\"text-decoration:underline;\">" + string + "</span>"
24
+ end
25
+
26
+ def list(citations_as_strings)
27
+ cts = citations_as_strings.map do |cit|
28
+ "\t<li>#{cit}</li>"
29
+ end
30
+ "<ol>\n" + cts.join("\n") + "\n</ol>\n"
31
+ end
32
+
33
+ # expects opening and closing tags. Operates on last one.
34
+ # trailing text (outside a tag) is operated on if existing
35
+ # <tag>text</tag> => <tag>text.</tag>
36
+ # <tag>text</tag>more_text => '...more_text.'
37
+ # if the text already has a period, then no change
38
+ # method periodize (TODO: should alias, really)
39
+ def periodize(array_or_string)
40
+ if array_or_string.is_a?(Array)
41
+ array_or_string.map do |st|
42
+ periodize(st)
43
+ end
44
+ else
45
+ st = array_or_string
46
+ if st[-1,1] == '>'
47
+ st.sub(/(.*)(<\/.*?>)/) do |v|
48
+ if $1[-1,1] =~ /[\.\?\!]/
49
+ $1 + $2
50
+ else
51
+ $1 + '.' + $2
52
+ end
53
+ end
54
+ else
55
+ if st[-1,1] =~ /[\.\?\!]/
56
+ st
57
+ else
58
+ st << '.'
59
+ end
60
+ end
61
+ end
62
+ end
63
+
64
+ end
65
+
data/lib/ooffice.rb ADDED
@@ -0,0 +1,39 @@
1
+ require 'fileutils'
2
+ FU = FileUtils
3
+
4
+ class OpenOffice
5
+
6
+ # unzips the file, gives a string of the content xml and will replace it
7
+ # whatever you pass back (preferably a string;)
8
+ # requires write access to the directory where the file is located
9
+ # The next time you open the file, it will act like you've corrupted the
10
+ # file (the character count is probably off, etc) just let it clean it up
11
+ # for you!
12
+ # new_basename = base name of the new file (preferably <name>.odt)
13
+ def modify_content(filename, new_basename, &blk)
14
+ content_file = 'content.xml'
15
+ basename = File.basename(filename)
16
+ tmpdir = basename + ".unzip.tmp"
17
+ Dir.chdir(File.dirname(filename)) do
18
+ if File.exist?(tmpdir)
19
+ warn "#{tmpdir} already exists!"
20
+ warn "deleting contents of #{tmpdir}"
21
+ FU.rm_rf(tmpdir)
22
+ end
23
+ FU.mkpath(tmpdir)
24
+ FU.cp(basename, tmpdir)
25
+ Dir.chdir(tmpdir) do
26
+ print `unzip -q #{basename}`
27
+ string = IO.read(content_file)
28
+ replace_with = blk.call(string)
29
+ File.open(content_file,'w') {|fh| fh.print(replace_with) }
30
+ FU.rm(basename, :force => true)
31
+ to_include = Dir["*"]
32
+ print `zip -r -q #{new_basename} #{to_include.map {|v| "'" + v + "'" }.join(' ')}`
33
+ FU.mv new_basename, '..'
34
+ end
35
+ FU.rm_rf tmpdir
36
+ end
37
+ end
38
+
39
+ end
data/lib/pubmed.rb ADDED
@@ -0,0 +1,209 @@
1
+ require 'open-uri'
2
+ require 'rexml/document'
3
+ require 'iconv'
4
+ require 'citation'
5
+
6
+ # given the html page where the display is specified as xml
7
+ # extracts out the requested pieces
8
+ class PubMed < Citation::Article
9
+
10
+ attr_accessor :pmid
11
+
12
+ # also takes pmid=hash of values to set
13
+ def initialize(pmid=nil, identifier=nil)
14
+ @quotes = []
15
+ if pmid.is_a? Hash
16
+ ########## THIS WHOLE MESS SHOULD BE ENCAPSULATED/INHERITED! but can't get
17
+ #inheritance with authors= working for some reason
18
+ @authors = []
19
+ pmid.each do |k,v|
20
+ if k == 'authors'
21
+ v.each do |auth|
22
+ if auth.is_a? String
23
+ authors.push( Citation::Author.from_s(auth) )
24
+ else
25
+ authors.push( auth )
26
+ end
27
+ end
28
+ else
29
+ send("#{k}=".to_sym, v)
30
+ end
31
+ end
32
+ ############ <-- END MESS
33
+ else
34
+ @authors = []
35
+ @pmid = pmid
36
+ @bibtype = :article
37
+ if pmid
38
+ begin
39
+ url = query_builder(pmid)
40
+ xml_string = get_xml(url)
41
+ extract_attrs_from_xml(xml_string)
42
+ end
43
+ end
44
+ if identifier
45
+ @ident = identifier
46
+ else
47
+ if pmid
48
+ @ident = create_id
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+
55
+ # returns xml from online (parses html output). No internet connection gives nil
56
+ def get_xml(query)
57
+ handle = open(query)
58
+ xml = handle.read
59
+ handle.close
60
+ xml
61
+ end
62
+
63
+ # first author's last name + year collapsing any spaces
64
+ def create_id
65
+ (@authors[0].last.to_s + @year.to_s).sub(/\s+/,'')
66
+ end
67
+
68
+ def inspect
69
+ st = "<#{self.class}:##{self.__id__} "
70
+ st << ( %w(authors ident quotes abstract journal_medline title year month vol issue pages).reject{|v| (v == :authors || v == :url)}.push(:bibtype).map {|v| ":#{v}=>#{send(v).inspect}"}.join(", ") )
71
+ st << " @authors=[#{authors.map{|g| g.inspect }.join(", ")}]"
72
+ st << ">"
73
+ st
74
+ end
75
+
76
+
77
+ # Builds the query to ask for a citation given a pubmed id
78
+ # valid types are xml, medline, (...need to figure out others)
79
+ private
80
+
81
+ # returns pubmed query based on pubmed id with xml as the output type. Note that the xml is embedded in the page's html.
82
+ # Example: http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=xml&list_uids=14654843&query_hl=6
83
+ #
84
+
85
+ # http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=Pubmed&id=11283582&rettype=xml
86
+ def query_builder(pmid)
87
+ type = 'xml'
88
+ #base_url = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?'
89
+ base_url = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'
90
+ cgi_params = ['db=Pubmed', "rettype=#{type}", 'retmode=text', "id=#{pmid}"].join('&')
91
+ base_url + cgi_params
92
+ end
93
+
94
+ # get an xml element's text according to its path (assumes single element)
95
+ def get_e_text(element, path)
96
+ #element = @xml if element == nil
97
+ els = element.elements.to_a(path)
98
+ if els.size > 1
99
+ raise "More than one #{path}!"
100
+ elsif els.size == 0
101
+ return nil
102
+ else
103
+ begin
104
+ text = els[0].get_text.value
105
+ return text
106
+ rescue NoMethodError
107
+ return nil
108
+ end
109
+ end
110
+ end
111
+
112
+ def get_author_list(xml)
113
+ auths = xml.elements.to_a("//PubmedArticle/MedlineCitation/Article/AuthorList/Author")
114
+ authors = auths.collect do |auth|
115
+ last_name = get_e_text(auth, "LastName")
116
+ initials = get_e_text(auth, "Initials")
117
+ ## I think we are getting author names out in UTF-8 which is not being interpreted properly.
118
+ ## Transform characters into something more standard, eh
119
+ begin
120
+ last_name = Iconv.new('iso-8859-15', 'utf-8').iconv(last_name)
121
+ rescue Iconv::IllegalSequence
122
+ last_name = "**BADCHARS**"
123
+ end
124
+ begin
125
+ initials = Iconv.new('iso-8859-15', 'utf-8').iconv(initials)
126
+ rescue Iconv::IllegalSequence
127
+ initials = "**BADINITS**"
128
+ end
129
+ Citation::Author.new(last_name, initials)
130
+ end
131
+ end
132
+
133
+ # if they are not set from the xml, tries to set from hashes or raises a
134
+ # RuntimeError
135
+ def set_journals_or_die(journal_medline)
136
+ error_messages = []
137
+ unless @journal_iso
138
+ if Journal::Medline_to_ISO.key?(journal_medline)
139
+ @journal_iso = Journal::Medline_to_ISO[journal_medline]
140
+ else
141
+ error_messages << "Expect key for '#{journal_medline}' in Journal::Medline_to_ISO"
142
+ error_messages << "(alter file journal/medline_to_iso.yaml)"
143
+ end
144
+ end
145
+
146
+ unless @journal_full
147
+ if Journal::Medline_to_Full.key?(journal_medline)
148
+ @journal_full = Journal::Medline_to_Full[journal_medline]
149
+ else
150
+ error_messages << "Expect key for '#{journal_medline}' in Journal::Medline_to_Full"
151
+ error_messages << "(alter file journal/medline_to_full.yaml)"
152
+ end
153
+
154
+ end
155
+ if error_messages.size > 0
156
+ label = "******************************************************************"
157
+ error_messages.unshift label
158
+ error_messages.unshift ''
159
+ error_messages << "Aborting!"
160
+ error_messages << label
161
+ error_messages << ''
162
+ raise(error_messages.join("\n"))
163
+ end
164
+
165
+ end
166
+
167
+ def extract_attrs_from_xml(xml_string)
168
+ xml = REXML::Document.new xml_string
169
+ art = "//PubmedArticle/MedlineCitation/Article/"
170
+ @title = get_e_text(xml, art + "ArticleTitle")
171
+ #puts "TITLE: "
172
+ #puts @title
173
+ @journal_medline = get_e_text(xml, "//PubmedArticle/MedlineCitation/MedlineJournalInfo/MedlineTA")
174
+ @journal_full = get_e_text(xml, art + 'Journal/Title')
175
+ @journal_iso = get_e_text(xml, art + 'Journal/ISOAbbreviation')
176
+ set_journals_or_die(@journal_medline)
177
+
178
+ #puts "THREE JOURNALS"
179
+ #puts @journal_medline
180
+ #puts @journal_full
181
+ #puts @journal_iso
182
+ @authors = get_author_list(xml)
183
+ iss = art + "Journal/JournalIssue/"
184
+ pdate = iss + "PubDate/"
185
+ @vol = get_e_text(xml, iss + "Volume")
186
+ @issue = get_e_text(xml, iss + "Issue")
187
+ @year = get_e_text(xml, pdate + "Year")
188
+ @month = get_e_text(xml, pdate + "Month")
189
+ @pages = get_e_text(xml, art + "Pagination/MedlinePgn") || '[Epub]'
190
+ @abstract = get_e_text(xml, art + "Abstract/AbstractText") || ''
191
+ end
192
+
193
+ # unnecessary now..
194
+ def pubmed_extract_xml_from_html(string)
195
+ html = ""
196
+ if string =~ /<dd><pre>(.*)<\/pre><\/dd>/m
197
+ html = $1
198
+ html.gsub!(/<\/?font.*?>/, '')
199
+ html.gsub!(/<\/?b.*?>/, '')
200
+ html.gsub!(/\&lt;/, '<')
201
+ html.gsub!(/\&gt;/, '>')
202
+ html.gsub!(/\&quot;/, '"')
203
+ end
204
+ html
205
+ end
206
+
207
+
208
+ end
209
+