bivy 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/.gitignore +21 -0
  2. data/History +16 -0
  3. data/LICENSE +29 -0
  4. data/README.rdoc +37 -0
  5. data/Rakefile +43 -0
  6. data/TODO +12 -0
  7. data/VERSION +1 -0
  8. data/bin/bivy.rb +56 -0
  9. data/bin/pubmed_to_bivy.rb +78 -0
  10. data/doc/config.yaml +13 -0
  11. data/doc/src/default.css +126 -0
  12. data/doc/src/default.template +38 -0
  13. data/doc/src/tutorial/basic_flow.page +71 -0
  14. data/doc/src/tutorial/index.page +8 -0
  15. data/doc/src/tutorial/new_formats_and_media.page +83 -0
  16. data/jrn_abbrev/abbr_html.tgz +0 -0
  17. data/jrn_abbrev/abbr_to_journal.yaml +676 -0
  18. data/jrn_abbrev/download_abbrevs.rb +20 -0
  19. data/jrn_abbrev/for_ruby_class.rb +686 -0
  20. data/jrn_abbrev/html_to_yaml.rb +50 -0
  21. data/lib/bibliography.rb +144 -0
  22. data/lib/bivy.rb +4 -0
  23. data/lib/citation.rb +194 -0
  24. data/lib/format.rb +120 -0
  25. data/lib/format/acs.rb +88 -0
  26. data/lib/format/bioinformatics.rb +33 -0
  27. data/lib/format/bmc.rb +38 -0
  28. data/lib/format/jtp.rb +30 -0
  29. data/lib/format/mla.rb +50 -0
  30. data/lib/formatter.rb +276 -0
  31. data/lib/journal.rb +6 -0
  32. data/lib/journal/iso_to_full.yaml +1320 -0
  33. data/lib/journal/medline_to_full.yaml +7 -0
  34. data/lib/journal/medline_to_iso.yaml +45 -0
  35. data/lib/media.rb +88 -0
  36. data/lib/media/html.rb +65 -0
  37. data/lib/ooffice.rb +39 -0
  38. data/lib/pubmed.rb +209 -0
  39. data/lib/rtf.rb +217 -0
  40. data/old_stuff/old_list2refs.rb +103 -0
  41. data/old_stuff/pubmed2html.rb +119 -0
  42. data/old_stuff/pubmed_bib_write.rb +92 -0
  43. data/old_stuff/xml.tmp.xml +115 -0
  44. data/scripts/merge_bibs.rb +70 -0
  45. data/spec/bibliography_spec.rb +127 -0
  46. data/spec/citation_positions.odt +0 -0
  47. data/spec/formatter_spec.rb +14 -0
  48. data/spec/formatter_spec/cits_after.xml +2 -0
  49. data/spec/formatter_spec/cits_before.xml +2 -0
  50. data/spec/formatter_spec/content.xml +2 -0
  51. data/spec/ooffice_spec.rb +27 -0
  52. data/spec/pubmed_spec.rb +26 -0
  53. data/spec/spec_helper.rb +7 -0
  54. data/spec/testfiles/doc1.odt +0 -0
  55. metadata +136 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ Anal Chem: Analytical Chemistry
3
+ Mol Cell Proteomics: Molecular and Cellular Proteomics
4
+ Cell Signal: Cellular Signalling
5
+ J Proteome Res: Journal of Proteome Research
6
+ Nat Chem Biol: Nature Chemical Biology
7
+ Nat Biotechnol: Nature Biotechnology
@@ -0,0 +1,45 @@
1
+ ---
2
+ Nature: Nature
3
+ Biomed Mass Spectrom: Biomed. Mass Spectrom.
4
+ Nat Biotechnol: Nat. Biotechnol.
5
+ Anal. Chem.: Anal. Chem.
6
+ Nat Chem Biol: Nat. Chem. Biol.
7
+ J. Proteome Res.: J. Proteome Res.
8
+ Rapid Commun Mass Spectrom: Rapid Commun. Mass Spectrom.
9
+ KDD Workshop on Mining Temporal and Sequential Data: KDD Workshop MTSD
10
+ J Chromatogr B Analyt Technol Biomed Life Sci: J. Chromatogr., B
11
+ J. Chromatogr., A: J. Chromatogr., A
12
+ J Chemom: J. Chemom.
13
+ Genome Res: Genome Res.
14
+ Biomed. Mass Spectrom.: Biomed. Mass Spectrom.
15
+ SIAM J. Num. Anal.: SIAM J. Num. Anal.
16
+ Proteomics: Proteomics
17
+ J. Chemom.: J. Chemom.
18
+ Eur. Food Res. Technol.: Eur. Food Res. Technol.
19
+ Environ Sci Technol: Environ. Sci. Technol.
20
+ Analytica Chimica Acta: Anal. Chim. Acta
21
+ Nature Methods: Nature Methods
22
+ J Chromatogr A: J. Chromatogr., A
23
+ IEEE ASSP: IEEE ASSP
24
+ Nucleic Acids Res: Nucleic Acids Res.
25
+ Mol Cell Proteomics: Mol. Cell. Proteomics
26
+ J Proteome Res: J. Proteome Res.
27
+ J Mol Biol: J. Mol. Biol.
28
+ J. Am. Soc. Mass Spectrom.: J. Am. Soc. Mass Spectrom.
29
+ J. Mol. Biol.: J. Mol. Biol.
30
+ Bioinformatics: Bioinformatics
31
+ Anal Chem: Anal. Chem.
32
+ Mol Cell Proteomics: Mol. Cell. Proteomics
33
+ Cell Signal: Cell Signal.
34
+ Curr Opin Chem Biol: Curr. Opin. Chem. Biol.
35
+ Proc IEEE Comput Syst Bioinform Conf: Proc. IEEE Comput. Syst. Bioinform. Conf.
36
+ Lab Chip: Lab Chip
37
+ Clin J Oncol Nurs: Clin. J. Oncol. Nurs.
38
+ Mass Spectrom Rev: Mass Spectrom. Rev.
39
+ Nat Protoc: Nat. Protoc.
40
+ Conf Proc IEEE Eng Med Biol Soc: Conf. Proc. IEEE Eng. Med. Biol. Soc.
41
+ Curr Opin Oncol: Curr. Opin. Oncol.
42
+ J Mass Spectrom: J. Mass Spectrom.
43
+ Trends Pharmacol Sci: Trends Pharmacol. Sci.
44
+ Assay Drug Dev Technol: Assay Drug Dev. Technol.
45
+ Mol Pharmacol: Mol. Pharmacol.
data/lib/media.rb ADDED
@@ -0,0 +1,88 @@
1
+
2
+ module Media
3
+ # note that you need to add the shortcut to module Format::MediaForwarding
4
+ # hash if you want to be able to access it!
5
+
6
+ # add to this class the conversion from the filename (as a symbol) to the
7
+ # properly capitalized classname. If the class name is just capitalized and
8
+ # all lower case, not necessary.
9
+ Symbol_to_class_string = { }
10
+ #:html => 'HTML'
11
+
12
+ def self.new(tp=:jtp)
13
+ require "media/#{tp}"
14
+ #puts( $".grep(/html/) )
15
+ klass_st = ((x = Symbol_to_class_string[tp]) ? x : tp.to_s.capitalize)
16
+ klass = Media.const_get(klass_st)
17
+ klass.new
18
+ end
19
+
20
+ def header
21
+ end
22
+
23
+ def footer
24
+ end
25
+
26
+ def call_it(method, string)
27
+ if var = string
28
+ send(method, var.to_s)
29
+ else
30
+ nil
31
+ end
32
+ end
33
+
34
+ def parenthesize(string)
35
+ '(' + string + ')'
36
+ end
37
+
38
+ def bracket(string)
39
+ '[' + string + ']'
40
+ end
41
+
42
+ def br(string)
43
+ call_it(:bracket, string)
44
+ end
45
+
46
+ def par(string)
47
+ call_it(:parenthesize, string)
48
+ end
49
+
50
+ # italicize
51
+ def i(string)
52
+ call_it(:italics, string)
53
+ end
54
+
55
+ # bold
56
+ def b(string)
57
+ call_it(:bold, string)
58
+ end
59
+
60
+ # underline
61
+ def u(string)
62
+ call_it(:underline, string)
63
+ end
64
+
65
+ def italics(string)
66
+ "<span style=\"font-style:italic;\">" + string + "</span>"
67
+ end
68
+
69
+ def bold(string)
70
+ "<span style=\"font-weight:bold;\">" + string + "</span>"
71
+ end
72
+
73
+ def underline(string)
74
+ "<span style=\"text-decoration:underline;\">" + string + "</span>"
75
+ end
76
+
77
+ # create the final bibliography string in whatever media you desire
78
+ # the example here is html
79
+ def format(format_object, citations)
80
+ cts = citations.map do |cit|
81
+ " <li>" + format_object.format(cit) + "</li>"
82
+ end
83
+ "<ol>\n" + cts.join("\n") + "\n</ol>\n"
84
+ end
85
+
86
+ end
87
+
88
+
data/lib/media/html.rb ADDED
@@ -0,0 +1,65 @@
1
+
2
+ class Media::HTML
3
+ include Media
4
+ Media::Symbol_to_class_string[:html] = 'HTML'
5
+
6
+ def header
7
+ "<html><body>"
8
+ end
9
+
10
+ def footer
11
+ "</body></html>"
12
+ end
13
+
14
+ def italics(string)
15
+ "<span style=\"font-style:italic;\">" + string + "</span>"
16
+ end
17
+
18
+ def bold(string)
19
+ "<span style=\"font-weight:bold;\">" + string + "</span>"
20
+ end
21
+
22
+ def underline(string)
23
+ "<span style=\"text-decoration:underline;\">" + string + "</span>"
24
+ end
25
+
26
+ def list(citations_as_strings)
27
+ cts = citations_as_strings.map do |cit|
28
+ "\t<li>#{cit}</li>"
29
+ end
30
+ "<ol>\n" + cts.join("\n") + "\n</ol>\n"
31
+ end
32
+
33
+ # expects opening and closing tags. Operates on last one.
34
+ # trailing text (outside a tag) is operated on if existing
35
+ # <tag>text</tag> => <tag>text.</tag>
36
+ # <tag>text</tag>more_text => '...more_text.'
37
+ # if the text already has a period, then no change
38
+ # method periodize (TODO: should alias, really)
39
+ def periodize(array_or_string)
40
+ if array_or_string.is_a?(Array)
41
+ array_or_string.map do |st|
42
+ periodize(st)
43
+ end
44
+ else
45
+ st = array_or_string
46
+ if st[-1,1] == '>'
47
+ st.sub(/(.*)(<\/.*?>)/) do |v|
48
+ if $1[-1,1] =~ /[\.\?\!]/
49
+ $1 + $2
50
+ else
51
+ $1 + '.' + $2
52
+ end
53
+ end
54
+ else
55
+ if st[-1,1] =~ /[\.\?\!]/
56
+ st
57
+ else
58
+ st << '.'
59
+ end
60
+ end
61
+ end
62
+ end
63
+
64
+ end
65
+
data/lib/ooffice.rb ADDED
@@ -0,0 +1,39 @@
1
+ require 'fileutils'
2
+ FU = FileUtils
3
+
4
+ class OpenOffice
5
+
6
+ # unzips the file, gives a string of the content xml and will replace it
7
+ # whatever you pass back (preferably a string;)
8
+ # requires write access to the directory where the file is located
9
+ # The next time you open the file, it will act like you've corrupted the
10
+ # file (the character count is probably off, etc) just let it clean it up
11
+ # for you!
12
+ # new_basename = base name of the new file (preferably <name>.odt)
13
+ def modify_content(filename, new_basename, &blk)
14
+ content_file = 'content.xml'
15
+ basename = File.basename(filename)
16
+ tmpdir = basename + ".unzip.tmp"
17
+ Dir.chdir(File.dirname(filename)) do
18
+ if File.exist?(tmpdir)
19
+ warn "#{tmpdir} already exists!"
20
+ warn "deleting contents of #{tmpdir}"
21
+ FU.rm_rf(tmpdir)
22
+ end
23
+ FU.mkpath(tmpdir)
24
+ FU.cp(basename, tmpdir)
25
+ Dir.chdir(tmpdir) do
26
+ print `unzip -q #{basename}`
27
+ string = IO.read(content_file)
28
+ replace_with = blk.call(string)
29
+ File.open(content_file,'w') {|fh| fh.print(replace_with) }
30
+ FU.rm(basename, :force => true)
31
+ to_include = Dir["*"]
32
+ print `zip -r -q #{new_basename} #{to_include.map {|v| "'" + v + "'" }.join(' ')}`
33
+ FU.mv new_basename, '..'
34
+ end
35
+ FU.rm_rf tmpdir
36
+ end
37
+ end
38
+
39
+ end
data/lib/pubmed.rb ADDED
@@ -0,0 +1,209 @@
1
+ require 'open-uri'
2
+ require 'rexml/document'
3
+ require 'iconv'
4
+ require 'citation'
5
+
6
+ # given the html page where the display is specified as xml
7
+ # extracts out the requested pieces
8
+ class PubMed < Citation::Article
9
+
10
+ attr_accessor :pmid
11
+
12
+ # also takes pmid=hash of values to set
13
+ def initialize(pmid=nil, identifier=nil)
14
+ @quotes = []
15
+ if pmid.is_a? Hash
16
+ ########## THIS WHOLE MESS SHOULD BE ENCAPSULATED/INHERITED! but can't get
17
+ #inheritance with authors= working for some reason
18
+ @authors = []
19
+ pmid.each do |k,v|
20
+ if k == 'authors'
21
+ v.each do |auth|
22
+ if auth.is_a? String
23
+ authors.push( Citation::Author.from_s(auth) )
24
+ else
25
+ authors.push( auth )
26
+ end
27
+ end
28
+ else
29
+ send("#{k}=".to_sym, v)
30
+ end
31
+ end
32
+ ############ <-- END MESS
33
+ else
34
+ @authors = []
35
+ @pmid = pmid
36
+ @bibtype = :article
37
+ if pmid
38
+ begin
39
+ url = query_builder(pmid)
40
+ xml_string = get_xml(url)
41
+ extract_attrs_from_xml(xml_string)
42
+ end
43
+ end
44
+ if identifier
45
+ @ident = identifier
46
+ else
47
+ if pmid
48
+ @ident = create_id
49
+ end
50
+ end
51
+ end
52
+ end
53
+
54
+
55
+ # returns xml from online (parses html output). No internet connection gives nil
56
+ def get_xml(query)
57
+ handle = open(query)
58
+ xml = handle.read
59
+ handle.close
60
+ xml
61
+ end
62
+
63
+ # first author's last name + year collapsing any spaces
64
+ def create_id
65
+ (@authors[0].last.to_s + @year.to_s).sub(/\s+/,'')
66
+ end
67
+
68
+ def inspect
69
+ st = "<#{self.class}:##{self.__id__} "
70
+ st << ( %w(authors ident quotes abstract journal_medline title year month vol issue pages).reject{|v| (v == :authors || v == :url)}.push(:bibtype).map {|v| ":#{v}=>#{send(v).inspect}"}.join(", ") )
71
+ st << " @authors=[#{authors.map{|g| g.inspect }.join(", ")}]"
72
+ st << ">"
73
+ st
74
+ end
75
+
76
+
77
+ # Builds the query to ask for a citation given a pubmed id
78
+ # valid types are xml, medline, (...need to figure out others)
79
+ private
80
+
81
+ # returns pubmed query based on pubmed id with xml as the output type. Note that the xml is embedded in the page's html.
82
+ # Example: http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=xml&list_uids=14654843&query_hl=6
83
+ #
84
+
85
+ # http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=Pubmed&id=11283582&rettype=xml
86
+ def query_builder(pmid)
87
+ type = 'xml'
88
+ #base_url = 'http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?'
89
+ base_url = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'
90
+ cgi_params = ['db=Pubmed', "rettype=#{type}", 'retmode=text', "id=#{pmid}"].join('&')
91
+ base_url + cgi_params
92
+ end
93
+
94
+ # get an xml element's text according to its path (assumes single element)
95
+ def get_e_text(element, path)
96
+ #element = @xml if element == nil
97
+ els = element.elements.to_a(path)
98
+ if els.size > 1
99
+ raise "More than one #{path}!"
100
+ elsif els.size == 0
101
+ return nil
102
+ else
103
+ begin
104
+ text = els[0].get_text.value
105
+ return text
106
+ rescue NoMethodError
107
+ return nil
108
+ end
109
+ end
110
+ end
111
+
112
+ def get_author_list(xml)
113
+ auths = xml.elements.to_a("//PubmedArticle/MedlineCitation/Article/AuthorList/Author")
114
+ authors = auths.collect do |auth|
115
+ last_name = get_e_text(auth, "LastName")
116
+ initials = get_e_text(auth, "Initials")
117
+ ## I think we are getting author names out in UTF-8 which is not being interpreted properly.
118
+ ## Transform characters into something more standard, eh
119
+ begin
120
+ last_name = Iconv.new('iso-8859-15', 'utf-8').iconv(last_name)
121
+ rescue Iconv::IllegalSequence
122
+ last_name = "**BADCHARS**"
123
+ end
124
+ begin
125
+ initials = Iconv.new('iso-8859-15', 'utf-8').iconv(initials)
126
+ rescue Iconv::IllegalSequence
127
+ initials = "**BADINITS**"
128
+ end
129
+ Citation::Author.new(last_name, initials)
130
+ end
131
+ end
132
+
133
+ # if they are not set from the xml, tries to set from hashes or raises a
134
+ # RuntimeError
135
+ def set_journals_or_die(journal_medline)
136
+ error_messages = []
137
+ unless @journal_iso
138
+ if Journal::Medline_to_ISO.key?(journal_medline)
139
+ @journal_iso = Journal::Medline_to_ISO[journal_medline]
140
+ else
141
+ error_messages << "Expect key for '#{journal_medline}' in Journal::Medline_to_ISO"
142
+ error_messages << "(alter file journal/medline_to_iso.yaml)"
143
+ end
144
+ end
145
+
146
+ unless @journal_full
147
+ if Journal::Medline_to_Full.key?(journal_medline)
148
+ @journal_full = Journal::Medline_to_Full[journal_medline]
149
+ else
150
+ error_messages << "Expect key for '#{journal_medline}' in Journal::Medline_to_Full"
151
+ error_messages << "(alter file journal/medline_to_full.yaml)"
152
+ end
153
+
154
+ end
155
+ if error_messages.size > 0
156
+ label = "******************************************************************"
157
+ error_messages.unshift label
158
+ error_messages.unshift ''
159
+ error_messages << "Aborting!"
160
+ error_messages << label
161
+ error_messages << ''
162
+ raise(error_messages.join("\n"))
163
+ end
164
+
165
+ end
166
+
167
+ def extract_attrs_from_xml(xml_string)
168
+ xml = REXML::Document.new xml_string
169
+ art = "//PubmedArticle/MedlineCitation/Article/"
170
+ @title = get_e_text(xml, art + "ArticleTitle")
171
+ #puts "TITLE: "
172
+ #puts @title
173
+ @journal_medline = get_e_text(xml, "//PubmedArticle/MedlineCitation/MedlineJournalInfo/MedlineTA")
174
+ @journal_full = get_e_text(xml, art + 'Journal/Title')
175
+ @journal_iso = get_e_text(xml, art + 'Journal/ISOAbbreviation')
176
+ set_journals_or_die(@journal_medline)
177
+
178
+ #puts "THREE JOURNALS"
179
+ #puts @journal_medline
180
+ #puts @journal_full
181
+ #puts @journal_iso
182
+ @authors = get_author_list(xml)
183
+ iss = art + "Journal/JournalIssue/"
184
+ pdate = iss + "PubDate/"
185
+ @vol = get_e_text(xml, iss + "Volume")
186
+ @issue = get_e_text(xml, iss + "Issue")
187
+ @year = get_e_text(xml, pdate + "Year")
188
+ @month = get_e_text(xml, pdate + "Month")
189
+ @pages = get_e_text(xml, art + "Pagination/MedlinePgn") || '[Epub]'
190
+ @abstract = get_e_text(xml, art + "Abstract/AbstractText") || ''
191
+ end
192
+
193
+ # unnecessary now..
194
+ def pubmed_extract_xml_from_html(string)
195
+ html = ""
196
+ if string =~ /<dd><pre>(.*)<\/pre><\/dd>/m
197
+ html = $1
198
+ html.gsub!(/<\/?font.*?>/, '')
199
+ html.gsub!(/<\/?b.*?>/, '')
200
+ html.gsub!(/\&lt;/, '<')
201
+ html.gsub!(/\&gt;/, '>')
202
+ html.gsub!(/\&quot;/, '"')
203
+ end
204
+ html
205
+ end
206
+
207
+
208
+ end
209
+