bivy 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/.gitignore +21 -0
  2. data/History +16 -0
  3. data/LICENSE +29 -0
  4. data/README.rdoc +37 -0
  5. data/Rakefile +43 -0
  6. data/TODO +12 -0
  7. data/VERSION +1 -0
  8. data/bin/bivy.rb +56 -0
  9. data/bin/pubmed_to_bivy.rb +78 -0
  10. data/doc/config.yaml +13 -0
  11. data/doc/src/default.css +126 -0
  12. data/doc/src/default.template +38 -0
  13. data/doc/src/tutorial/basic_flow.page +71 -0
  14. data/doc/src/tutorial/index.page +8 -0
  15. data/doc/src/tutorial/new_formats_and_media.page +83 -0
  16. data/jrn_abbrev/abbr_html.tgz +0 -0
  17. data/jrn_abbrev/abbr_to_journal.yaml +676 -0
  18. data/jrn_abbrev/download_abbrevs.rb +20 -0
  19. data/jrn_abbrev/for_ruby_class.rb +686 -0
  20. data/jrn_abbrev/html_to_yaml.rb +50 -0
  21. data/lib/bibliography.rb +144 -0
  22. data/lib/bivy.rb +4 -0
  23. data/lib/citation.rb +194 -0
  24. data/lib/format.rb +120 -0
  25. data/lib/format/acs.rb +88 -0
  26. data/lib/format/bioinformatics.rb +33 -0
  27. data/lib/format/bmc.rb +38 -0
  28. data/lib/format/jtp.rb +30 -0
  29. data/lib/format/mla.rb +50 -0
  30. data/lib/formatter.rb +276 -0
  31. data/lib/journal.rb +6 -0
  32. data/lib/journal/iso_to_full.yaml +1320 -0
  33. data/lib/journal/medline_to_full.yaml +7 -0
  34. data/lib/journal/medline_to_iso.yaml +45 -0
  35. data/lib/media.rb +88 -0
  36. data/lib/media/html.rb +65 -0
  37. data/lib/ooffice.rb +39 -0
  38. data/lib/pubmed.rb +209 -0
  39. data/lib/rtf.rb +217 -0
  40. data/old_stuff/old_list2refs.rb +103 -0
  41. data/old_stuff/pubmed2html.rb +119 -0
  42. data/old_stuff/pubmed_bib_write.rb +92 -0
  43. data/old_stuff/xml.tmp.xml +115 -0
  44. data/scripts/merge_bibs.rb +70 -0
  45. data/spec/bibliography_spec.rb +127 -0
  46. data/spec/citation_positions.odt +0 -0
  47. data/spec/formatter_spec.rb +14 -0
  48. data/spec/formatter_spec/cits_after.xml +2 -0
  49. data/spec/formatter_spec/cits_before.xml +2 -0
  50. data/spec/formatter_spec/content.xml +2 -0
  51. data/spec/ooffice_spec.rb +27 -0
  52. data/spec/pubmed_spec.rb +26 -0
  53. data/spec/spec_helper.rb +7 -0
  54. data/spec/testfiles/doc1.odt +0 -0
  55. metadata +136 -0
data/lib/format/acs.rb ADDED
@@ -0,0 +1,88 @@
1
+
2
+ # see: http://www.lib.berkeley.edu/CHEM/acsstyle.html
3
+
4
+ # Article:
5
+ # Basic Format:
6
+ # Author, A. A; Author, B. B; Author, C. C. Title of Article. Journal Abbreviation (italics) [Online if online] Year (boldface), Volume (italics), Pagination.
7
+
8
+ # Borman, S. Protein Sequencing For The Masses. Chem. Eng. News [Online] 2004, 82, pp 22-23.
9
+ # Slunt, K. M.; Giancarlo, L. C. Student-Centered Learning: A Comparison of Two Different Methods of Instruction. J. Chem. Educ. 2004, 81, pp 985-988.
10
+ # Takahaski, T. The Fate of Industrial Carbon Dioxide. Science [Online] 2004, 305, 352-353.
11
+ # {italics } {b} {i}
12
+ # (1) Washburn, M.P.; Wolters, D.; Yates, J.R. Nat. Biotechnol. 2001, 19, 242-7.
13
+ #
14
+ #
15
+ # Book with Author(s)
16
+
17
+ #Basic Format:
18
+ #Author, A. A.; Author, B. B. Book Title (italics), Edition (if any); Publisher: Place of Publication, Year; Pagination.
19
+ #
20
+ #Dill, K. A.; Bromberg, S. Molecular Driving Forces: Statistical Thermodynamics in Chemistry and Biology; Garland Science: New York, 2003.
21
+ #Engel, R; Cohen, J. I. Synthesis of Carbon-Phosphorus Bonds: New Methods of Exploration; CRC Press: Boca Raton, FL, 2004; pp 54-56.
22
+ #Zumdahl, S. S. Chemical Principles, 4th ed.; Houghton Mifflin: Boston, MA, 2002; p 7.
23
+
24
+ class Format::ACS
25
+ include Format
26
+ Format::Symbol_to_class_string[:acs] = 'ACS'
27
+
28
+ =begin
29
+ def article(cit)
30
+ [author_list('.', ', ', '; '), i(journal_iso), b(year), [vol, pages].compact.join(', ')]
31
+ end
32
+
33
+ # needs to deal with journal or no journal
34
+ def article_to_be_submitted(cit)
35
+ [author_list('.'), i(journal_iso), i('Article to be submitted')]
36
+ end
37
+
38
+ def webpage(cit)
39
+ [periodize(b(title)), u(url), periodize("#{b(year)}, #{month} #{day}")].compact.join(' ')
40
+ end
41
+
42
+ # TODO: book
43
+
44
+ =end
45
+
46
+ def article
47
+ vol_o_nil =
48
+ if vol
49
+ "#{vol}"
50
+ else
51
+ nil
52
+ end
53
+ [author_list('.'), i(journal_iso), [b(year), i(vol), pages].compact.join(', ')]
54
+ end
55
+
56
+ def article_to_be_submitted
57
+ [author_list('.'), i(journal_iso), i('Article to be submitted')]
58
+ end
59
+
60
+ # Webpage (2 examples):
61
+ # ChemFinder.Com. http://chemfinder.cambridgesoft.com (accessed July 14, 2004).
62
+
63
+ # (accessed July 14, 2004)
64
+ def webpage
65
+ accessed_string = nil
66
+ if year
67
+ accessed_string = '(accessed '
68
+ if month
69
+ accessed_string << month.to_s
70
+ if day
71
+ accessed_string << " #{day}, "
72
+ else
73
+ accessed_string << " "
74
+ end
75
+ end
76
+ accessed_string << year.to_s << ')'
77
+ end
78
+
79
+ [periodize(title), url, accessed_string].compact.join(' ') << '.'
80
+ end
81
+
82
+ # TODO: book
83
+
84
+
85
+ end
86
+
87
+
88
+
@@ -0,0 +1,33 @@
1
+
2
+ # authors(initialed,comma'd,no spaces, using et al for 3 or more) (2007) *title* i(Journal_iso) *year*, i(vol(issue)), pages.
3
+ # authors(initialed) \(year\). "title" u(Journal_medline) *vol*(issue): pages.
4
+ class Format::Bioinformatics
5
+ include Format
6
+
7
+ def article
8
+ [ [periodize(author_list('.', ',',nil)), par(year), periodize(title), i(journal_iso)].compact.join(' '), b(vol), pages].compact.join(', ') << '.'
9
+ end
10
+
11
+ #def article_to_be_submitted
12
+ # #[periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", i(journal_medline), b(vol), par(issue), ": #{pages}"].compact.join(' ')
13
+ # [author_list('.'), "\"#{periodize(title)}\"", u(journal_medline), i('Article to be submitted')].compact.join(' ') << '.'
14
+ #end
15
+
16
+ def article_in_review
17
+ [periodize(author_list('.', ',',nil)), par(year), periodize(title), i(journal_iso)].compact.join(' ') + ', ' + i('manuscript in review') << '.'
18
+ end
19
+
20
+ # shouldn't really be webpages in the references
21
+ def webpage
22
+ abort 'shouldnt be webpages in Bioinformatics journals!'
23
+ end
24
+
25
+ #def workshop
26
+ # [periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", u(name), "#{pages}"].compact.join(' ') << '.'
27
+ #end
28
+
29
+ # TODO: book
30
+
31
+ end
32
+
33
+
data/lib/format/bmc.rb ADDED
@@ -0,0 +1,38 @@
1
+
2
+ class Format::BMC
3
+ include Format
4
+ Format::Symbol_to_class_string[:bmc] = 'BMC'
5
+
6
+ def article
7
+ issue_string =
8
+ if issue.nil?
9
+ ''
10
+ else
11
+ "(#{issue})"
12
+ end
13
+ vol_string =
14
+ if vol.nil?
15
+ ''
16
+ else
17
+ vol
18
+ end
19
+
20
+ # This is the original:
21
+ "#{author_list('')}: #{b(periodize(title))} #{i(journal_medline)} #{year}, #{b(vol_string)}#{issue_string}:#{pages_full}."
22
+ # ThIS IS NOT the BMC format:
23
+ # "#{author_list('.', ' ', ', ', '&', true)} (#{year}) #{title.gsub(/\.$/,'')}, #{periodize(i(journal_iso))} #{i(vol_string)}, #{pages}."
24
+ end
25
+
26
+ # Webpage (2 examples):
27
+ # b(Webpage Name) [http://chemfinder.cambridgesoft.com]
28
+
29
+ def webpage
30
+ [b(title), br(url)].compact.join(' ')
31
+ end
32
+
33
+ # TODO: book
34
+
35
+ end
36
+
37
+
38
+
data/lib/format/jtp.rb ADDED
@@ -0,0 +1,30 @@
1
+
2
+ # authors(initialed) *title* i(Journal_iso) *year*, i(vol(issue)), pages.
3
+ class Format::JTP
4
+ include Format
5
+ Format::Symbol_to_class_string[:jtp] = 'JTP'
6
+
7
+ def article
8
+ vol_issue =
9
+ if vol
10
+ "#{vol}#{par(issue)}"
11
+ else
12
+ nil
13
+ end
14
+ [author_list('.'), b(title), i(journal_iso), [b(year), vol_issue, pages].compact.join(', ')]
15
+ end
16
+
17
+ def article_to_be_submitted
18
+ [author_list('.'), i(journal_iso), i('Article to be submitted')]
19
+ end
20
+
21
+ def webpage
22
+ [periodize(b(title)), u(url), periodize("#{b(year)}, #{month} #{day}")].compact.join(' ')
23
+ end
24
+
25
+ # TODO: book
26
+
27
+
28
+ end
29
+
30
+
data/lib/format/mla.rb ADDED
@@ -0,0 +1,50 @@
1
+
2
+ # authors(initialed) *title* i(Journal_iso) *year*, i(vol(issue)), pages.
3
+ # authors(initialed) \(year\). "title" u(Journal_medline) *vol*(issue): pages.
4
+ class Format::MLA
5
+ include Format
6
+ Format::Symbol_to_class_string[:mla] = 'MLA'
7
+
8
+ def article
9
+ vol_issue =
10
+ if vol
11
+ "#{b(vol)}#{par(issue)}"
12
+ else
13
+ nil
14
+ end
15
+ [periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", u(journal_medline), "#{vol_issue}: #{pages}"].compact.join(' ') << '.'
16
+ end
17
+
18
+ def article_to_be_submitted
19
+ #[periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", i(journal_medline), b(vol), par(issue), ": #{pages}"].compact.join(' ')
20
+ [author_list('.'), "\"#{periodize(title)}\"", u(journal_medline), i('Article to be submitted')].compact.join(' ') << '.'
21
+ end
22
+
23
+ def webpage
24
+ accessed_string = nil
25
+ if year
26
+ accessed_string = '(accessed '
27
+ if month
28
+ accessed_string << month.to_s
29
+ if day
30
+ accessed_string << " #{day}, "
31
+ else
32
+ accessed_string << " "
33
+ end
34
+ end
35
+ accessed_string << year.to_s << ')'
36
+ end
37
+
38
+ [periodize(title), url, accessed_string].compact.join(' ') << '.'
39
+ end
40
+
41
+ def workshop
42
+ [periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", u(name), "#{pages}"].compact.join(' ') << '.'
43
+
44
+ end
45
+
46
+ # TODO: book
47
+
48
+ end
49
+
50
+
data/lib/formatter.rb ADDED
@@ -0,0 +1,276 @@
1
+ require 'citation'
2
+ require 'pubmed'
3
+ require 'set'
4
+ require 'ooffice'
5
+
6
+ class Formatter
7
+
8
+ FindStartCitation = '#['
9
+ FindEndCitation = ']'
10
+ SplitCitations = ','
11
+ StartNumbering = 1
12
+ ReplaceWith = 'X'
13
+
14
+
15
+ # later we can figure out the little trick to do this on the fly
16
+ # the letter for the entry being added
17
+ ArSizeToLetter = {
18
+ 1 => 'b',
19
+ 2 => 'c',
20
+ 3 => 'd',
21
+ 4 => 'e',
22
+ 5 => 'f',
23
+ 6 => 'g',
24
+ 7 => 'h',
25
+ 8 => 'i',
26
+ 9 => 'j',
27
+ 10 => 'k',
28
+ }
29
+
30
+ # outputs an html string and modifies the document (only .odt files
31
+ # currently)
32
+ # biblio is a yaml file
33
+ # creates .cit.odt file with citations replaced
34
+ def create_bibliography(document, biblio, options={})
35
+ bib = Bibliography.from_yaml(biblio)
36
+ new_document = document.sub(/\.odt$/, '.cit.odt')
37
+
38
+ ordered_cit_ids = nil
39
+ if options[:bib] == :mla
40
+ OpenOffice.new.modify_content(document, new_document) do |xml|
41
+ if cp = options[:cit_pos]
42
+ xml = reposition_citations(xml, cp)
43
+ end
44
+ # this may actually change the year on some bibs
45
+ (new_xml, ordered_cit_ids) = replace_citations_mla(xml, bib)
46
+ new_xml
47
+ end
48
+ unless ordered_cit_ids ; abort "Couldn't get citations" end
49
+ bib.select_by_id!(ordered_cit_ids)
50
+ else
51
+ OpenOffice.new.modify_content(document, new_document) do |xml|
52
+ if cp = options[:cit_pos]
53
+ xml = reposition_citations(xml, cp)
54
+ end
55
+ if options[:bracket]
56
+ (new_xml, ordered_cit_ids) = replace_citations_numerically(xml, FindStartCitation, FindEndCitation, SplitCitations, StartNumbering, ' [X]')
57
+ # (string, find_start_citation='#[', find_end_citation=']', split_citations=',', start_numbering=1, replace_with='X')
58
+ else
59
+ (new_xml, ordered_cit_ids) = replace_citations_numerically(xml)
60
+ end
61
+ new_xml
62
+ end
63
+ unless ordered_cit_ids ; abort "Couldn't get citations" end
64
+ bib.select_by_id!(ordered_cit_ids)
65
+ end
66
+ bib
67
+ end
68
+
69
+ # moves all citations following '.' or ',' to desired position
70
+ # position = :before or :after
71
+ #
72
+ def reposition_citations(xml, position)
73
+ # search<text:span text:style-name="T29">#[Keller2002]</text:span>,
74
+ # --> search,<text:span text:style-name="T29">#[Keller2002]</text:span>
75
+ # human samples<text:span text:style-name="T29">#[Qian2005]</text:span>.
76
+ # --> human samples.<text:span text:style-name="T29">#[Qian2005]</text:span>
77
+ # OR human samples#[Qian2005].
78
+ # --> human samples.#[Qian2005]
79
+ case position
80
+ when :after
81
+ xml = xml.gsub(/(#\[.*?\])([\.\,])/) do |md|
82
+ $2 + $1
83
+ end
84
+ xml.gsub(/(<[^\/][^<]+>)(#\[.*?\])(<\/.*?>)([\.\,])/) do |md|
85
+ $4 + $1 + $2 + $3
86
+ end
87
+ when :before
88
+ xml = xml.gsub(/([\.\,])(#\[.*?\])/) do |md|
89
+ $2 + $1
90
+ end
91
+ xml.gsub(/([\.\,])(<[^\/][^<]+>)(#\[.*?\])(<\/.*?>)/) do |md|
92
+ $2 + $3 + $4 + $1
93
+ end
94
+
95
+ end
96
+ end
97
+
98
+ def to_mla_ref(citation)
99
+ if citation.bibtype == :webpage
100
+ citation.title
101
+ else
102
+ author_bit =
103
+ if citation.authors.is_a? String # just use first listed thing for now
104
+ citation.authors.split(/\s+/).first
105
+ else # assuming array
106
+ aa = citation.authors
107
+ case aa.size
108
+ when 1
109
+ aa[0].last
110
+ when 2
111
+ aa[0].last + ' and ' + aa[1].last
112
+ #aa[0].last + ' &amp; ' + aa[1].last ## using an ampersand
113
+ else
114
+ aa[0].last + ' et al.'
115
+ end
116
+ end
117
+ author_bit + ', ' + "#{citation.year}"
118
+ end
119
+ end
120
+
121
+ # order can be :alphabetical or :as_is
122
+ # warning: may change the year on some bib citations!
123
+ def replace_citations_mla(string, bib, start_citation='#[', end_citation=']', split_citations=',', order=:alphabetical)
124
+ regex_string = Regexp.escape(start_citation) + '(.*?)' + Regexp.escape(end_citation)
125
+ id_to_cit = {}
126
+ bib.citations.each do |cit|
127
+ id_to_cit[cit.ident] = cit
128
+ end
129
+
130
+ # This is a hash that helps to distinguish between citations with the same
131
+ # mla_ref.
132
+ mla_ref_to_cits = Hash.new {|h,k| h[k] = [] }
133
+ id_to_mla_ref = {} # from the citation id to the mla_ref
134
+ # First pass to determine if some author/year combos are duplicated
135
+ # in which case they will be yeara and yearb and so on (2004a).
136
+ string.scan(/#{regex_string}/) do
137
+ refs = $1.split(split_citations)
138
+ refs.each do |ref|
139
+ unless id_to_cit.key?(ref)
140
+ puts "*************************************"
141
+ puts "* No citation for: #{ref}"
142
+ puts "*************************************"
143
+ exit
144
+ end
145
+ citation = id_to_cit[ref]
146
+ mla_ref = to_mla_ref(citation)
147
+ ## alter mla_ref if necessary
148
+ if mla_ref_to_cits.key?(mla_ref)
149
+ if id_to_mla_ref.key?(citation.ident)
150
+ # this reference already has already been seen, do nothing
151
+ else # conflict
152
+ ar = mla_ref_to_cits[mla_ref]
153
+ if ar.size == 1
154
+ # change mla_ref
155
+ id_to_mla_ref[ar[0].ident] = mla_ref + 'a' # new mla_ref
156
+ # change the citation year for bibliography
157
+ id_to_cit[ar[0].ident].year << 'a'
158
+ end
159
+ letter = ArSizeToLetter[ar.size]
160
+ new_mla_ref = mla_ref + letter
161
+ # put in a modified mla_ref
162
+ id_to_mla_ref[citation.ident] = new_mla_ref
163
+ # change the year in bib
164
+ id_to_cit[citation.ident].year << letter
165
+ mla_ref_to_cits[mla_ref].push(citation)
166
+ end
167
+ else # first mla_ref of its kind
168
+ mla_ref_to_cits[mla_ref].push(citation)
169
+ id_to_mla_ref[ref] = mla_ref
170
+ end
171
+ end
172
+ end
173
+
174
+ # second pass to create actual citation with updated mla_refs
175
+ new_string = string.gsub(/#{regex_string}/) do
176
+ refs = $1.split(split_citations)
177
+ mla_refs = refs.map do |ref|
178
+ id_to_mla_ref[id_to_cit[ref].ident]
179
+ end
180
+ to_mla_citation_string(mla_refs)
181
+ end
182
+
183
+ # final list of citations will be ordered by ordering the mla_ref_to_cits
184
+ # keys (the years are already ordered a,b,c... if necessary)
185
+
186
+ ordered_cit_ids = []
187
+ sorted_mla_refs = mla_ref_to_cits.keys.sort_by {|k| k.downcase }
188
+
189
+ sorted_mla_refs.each do |mla_ref|
190
+ cits = mla_ref_to_cits[mla_ref]
191
+ ordered_cit_ids.push( *(cits.map {|cit| cit.ident }) )
192
+ end
193
+ [new_string, ordered_cit_ids]
194
+ end
195
+
196
+ # takes a string of mla_refs and spits out the final guy
197
+ def to_mla_citation_string(mla_refs)
198
+ " (#{mla_refs.join('; ')})"
199
+ end
200
+
201
+
202
+ # replace citations will take any string and replace any references
203
+ # according to find_start_citation, find_end_citation and split_citations
204
+ # returns [string, citation_hash]
205
+ # string is the substituted string, hash is hashed by reference and gives
206
+ # replace_with uses the formatting of the string given to format the
207
+ # citation. The 'X' is the numeric citation. So, X is just the number and
208
+ # '[X]' would be a bracketed number.
209
+ # the citation number
210
+ # (see tests for examples)
211
+ def replace_citations_numerically(string, find_start_citation=FindStartCitation, find_end_citation=FindEndCitation, split_citations=SplitCitations, start_numbering=StartNumbering, replace_with=ReplaceWith)
212
+ (before_X, after_X) = replace_with.split('X').map(&:to_s)
213
+ before_X ||= ''
214
+ after_X ||= ''
215
+ cits = {}
216
+ regex_string = Regexp.escape(find_start_citation) + '(.*?)' + Regexp.escape(find_end_citation)
217
+
218
+ ref_cnt = start_numbering
219
+ new_string = string.gsub(/#{regex_string}/) do
220
+ refs = $1.split(split_citations)
221
+ cit_list = refs.map do |ref|
222
+ unless cits.key? ref
223
+ cits[ref] = ref_cnt
224
+ ref_cnt += 1
225
+ end
226
+ cits[ref] # <- no formatting at this point
227
+ end
228
+ before_X + cit_string(cit_list) + after_X
229
+ end
230
+ ordered_cits = cits.map {|k,v| [v,k]}.sort.map {|ar| ar[1] }
231
+ [new_string, ordered_cits]
232
+ end
233
+
234
+ # given an array of citations, generate the string for their citation
235
+ #
236
+ # e.g. if [10, 11, 12], string should be '10-12'
237
+ # e.g. if [4, 13, 12, 17] string should be '4,12-13,17'
238
+ def cit_string(cit_num_array)
239
+ # Single citation:
240
+ if cit_num_array.size == 0
241
+ return ''
242
+ elsif cit_num_array.size == 1
243
+ return cit_num_array.first.to_s
244
+ end
245
+ # Multiple citations:
246
+ cit_num_array.sort!
247
+ #memo = [previous, running, size_cnt, string]
248
+ tracker = nil
249
+ cit_num_array.inject([nil,false,1,'']) do |memo,num|
250
+ ## not in a run:
251
+ if (num - 1) != memo[0] # not in a run:
252
+ if memo[1] # finish a run
253
+ memo[3] << "-#{memo[0]},#{num}"
254
+ else # wasn't running before:
255
+ if memo[0] # if there is a previous
256
+ memo[3] << ",#{num}"
257
+ else # the start
258
+ memo[3] << "#{num}"
259
+ end
260
+ end
261
+ memo[1] = false # state that we are not in a run
262
+ else # in a run
263
+ if memo[2] == cit_num_array.size # the last item (in a run)
264
+ memo[3] << "-#{num}"
265
+ end
266
+ memo[1] = true # state that we are running
267
+ end
268
+ memo[2] += 1 # keep track of the size
269
+ memo[0] = num # set previous number
270
+ tracker = memo
271
+ memo
272
+ end
273
+ tracker[3]
274
+ end
275
+
276
+ end