bivy 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/.gitignore +21 -0
  2. data/History +16 -0
  3. data/LICENSE +29 -0
  4. data/README.rdoc +37 -0
  5. data/Rakefile +43 -0
  6. data/TODO +12 -0
  7. data/VERSION +1 -0
  8. data/bin/bivy.rb +56 -0
  9. data/bin/pubmed_to_bivy.rb +78 -0
  10. data/doc/config.yaml +13 -0
  11. data/doc/src/default.css +126 -0
  12. data/doc/src/default.template +38 -0
  13. data/doc/src/tutorial/basic_flow.page +71 -0
  14. data/doc/src/tutorial/index.page +8 -0
  15. data/doc/src/tutorial/new_formats_and_media.page +83 -0
  16. data/jrn_abbrev/abbr_html.tgz +0 -0
  17. data/jrn_abbrev/abbr_to_journal.yaml +676 -0
  18. data/jrn_abbrev/download_abbrevs.rb +20 -0
  19. data/jrn_abbrev/for_ruby_class.rb +686 -0
  20. data/jrn_abbrev/html_to_yaml.rb +50 -0
  21. data/lib/bibliography.rb +144 -0
  22. data/lib/bivy.rb +4 -0
  23. data/lib/citation.rb +194 -0
  24. data/lib/format.rb +120 -0
  25. data/lib/format/acs.rb +88 -0
  26. data/lib/format/bioinformatics.rb +33 -0
  27. data/lib/format/bmc.rb +38 -0
  28. data/lib/format/jtp.rb +30 -0
  29. data/lib/format/mla.rb +50 -0
  30. data/lib/formatter.rb +276 -0
  31. data/lib/journal.rb +6 -0
  32. data/lib/journal/iso_to_full.yaml +1320 -0
  33. data/lib/journal/medline_to_full.yaml +7 -0
  34. data/lib/journal/medline_to_iso.yaml +45 -0
  35. data/lib/media.rb +88 -0
  36. data/lib/media/html.rb +65 -0
  37. data/lib/ooffice.rb +39 -0
  38. data/lib/pubmed.rb +209 -0
  39. data/lib/rtf.rb +217 -0
  40. data/old_stuff/old_list2refs.rb +103 -0
  41. data/old_stuff/pubmed2html.rb +119 -0
  42. data/old_stuff/pubmed_bib_write.rb +92 -0
  43. data/old_stuff/xml.tmp.xml +115 -0
  44. data/scripts/merge_bibs.rb +70 -0
  45. data/spec/bibliography_spec.rb +127 -0
  46. data/spec/citation_positions.odt +0 -0
  47. data/spec/formatter_spec.rb +14 -0
  48. data/spec/formatter_spec/cits_after.xml +2 -0
  49. data/spec/formatter_spec/cits_before.xml +2 -0
  50. data/spec/formatter_spec/content.xml +2 -0
  51. data/spec/ooffice_spec.rb +27 -0
  52. data/spec/pubmed_spec.rb +26 -0
  53. data/spec/spec_helper.rb +7 -0
  54. data/spec/testfiles/doc1.odt +0 -0
  55. metadata +136 -0
data/lib/format/acs.rb ADDED
@@ -0,0 +1,88 @@
1
+
2
+ # see: http://www.lib.berkeley.edu/CHEM/acsstyle.html
3
+
4
+ # Article:
5
+ # Basic Format:
6
+ # Author, A. A; Author, B. B; Author, C. C. Title of Article. Journal Abbreviation (italics) [Online if online] Year (boldface), Volume (italics), Pagination.
7
+
8
+ # Borman, S. Protein Sequencing For The Masses. Chem. Eng. News [Online] 2004, 82, pp 22-23.
9
+ # Slunt, K. M.; Giancarlo, L. C. Student-Centered Learning: A Comparison of Two Different Methods of Instruction. J. Chem. Educ. 2004, 81, pp 985-988.
10
+ # Takahaski, T. The Fate of Industrial Carbon Dioxide. Science [Online] 2004, 305, 352-353.
11
+ # {italics } {b} {i}
12
+ # (1) Washburn, M.P.; Wolters, D.; Yates, J.R. Nat. Biotechnol. 2001, 19, 242-7.
13
+ #
14
+ #
15
+ # Book with Author(s)
16
+
17
+ #Basic Format:
18
+ #Author, A. A.; Author, B. B. Book Title (italics), Edition (if any); Publisher: Place of Publication, Year; Pagination.
19
+ #
20
+ #Dill, K. A.; Bromberg, S. Molecular Driving Forces: Statistical Thermodynamics in Chemistry and Biology; Garland Science: New York, 2003.
21
+ #Engel, R; Cohen, J. I. Synthesis of Carbon-Phosphorus Bonds: New Methods of Exploration; CRC Press: Boca Raton, FL, 2004; pp 54-56.
22
+ #Zumdahl, S. S. Chemical Principles, 4th ed.; Houghton Mifflin: Boston, MA, 2002; p 7.
23
+
24
+ class Format::ACS
25
+ include Format
26
+ Format::Symbol_to_class_string[:acs] = 'ACS'
27
+
28
+ =begin
29
+ def article(cit)
30
+ [author_list('.', ', ', '; '), i(journal_iso), b(year), [vol, pages].compact.join(', ')]
31
+ end
32
+
33
+ # needs to deal with journal or no journal
34
+ def article_to_be_submitted(cit)
35
+ [author_list('.'), i(journal_iso), i('Article to be submitted')]
36
+ end
37
+
38
+ def webpage(cit)
39
+ [periodize(b(title)), u(url), periodize("#{b(year)}, #{month} #{day}")].compact.join(' ')
40
+ end
41
+
42
+ # TODO: book
43
+
44
+ =end
45
+
46
+ def article
47
+ vol_o_nil =
48
+ if vol
49
+ "#{vol}"
50
+ else
51
+ nil
52
+ end
53
+ [author_list('.'), i(journal_iso), [b(year), i(vol), pages].compact.join(', ')]
54
+ end
55
+
56
+ def article_to_be_submitted
57
+ [author_list('.'), i(journal_iso), i('Article to be submitted')]
58
+ end
59
+
60
+ # Webpage (2 examples):
61
+ # ChemFinder.Com. http://chemfinder.cambridgesoft.com (accessed July 14, 2004).
62
+
63
+ # (accessed July 14, 2004)
64
+ def webpage
65
+ accessed_string = nil
66
+ if year
67
+ accessed_string = '(accessed '
68
+ if month
69
+ accessed_string << month.to_s
70
+ if day
71
+ accessed_string << " #{day}, "
72
+ else
73
+ accessed_string << " "
74
+ end
75
+ end
76
+ accessed_string << year.to_s << ')'
77
+ end
78
+
79
+ [periodize(title), url, accessed_string].compact.join(' ') << '.'
80
+ end
81
+
82
+ # TODO: book
83
+
84
+
85
+ end
86
+
87
+
88
+
@@ -0,0 +1,33 @@
1
+
2
+ # authors(initialed,comma'd,no spaces, using et al for 3 or more) (2007) *title* i(Journal_iso) *year*, i(vol(issue)), pages.
3
+ # authors(initialed) \(year\). "title" u(Journal_medline) *vol*(issue): pages.
4
+ class Format::Bioinformatics
5
+ include Format
6
+
7
+ def article
8
+ [ [periodize(author_list('.', ',',nil)), par(year), periodize(title), i(journal_iso)].compact.join(' '), b(vol), pages].compact.join(', ') << '.'
9
+ end
10
+
11
+ #def article_to_be_submitted
12
+ # #[periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", i(journal_medline), b(vol), par(issue), ": #{pages}"].compact.join(' ')
13
+ # [author_list('.'), "\"#{periodize(title)}\"", u(journal_medline), i('Article to be submitted')].compact.join(' ') << '.'
14
+ #end
15
+
16
+ def article_in_review
17
+ [periodize(author_list('.', ',',nil)), par(year), periodize(title), i(journal_iso)].compact.join(' ') + ', ' + i('manuscript in review') << '.'
18
+ end
19
+
20
+ # shouldn't really be webpages in the references
21
+ def webpage
22
+ abort 'shouldnt be webpages in Bioinformatics journals!'
23
+ end
24
+
25
+ #def workshop
26
+ # [periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", u(name), "#{pages}"].compact.join(' ') << '.'
27
+ #end
28
+
29
+ # TODO: book
30
+
31
+ end
32
+
33
+
data/lib/format/bmc.rb ADDED
@@ -0,0 +1,38 @@
1
+
2
+ class Format::BMC
3
+ include Format
4
+ Format::Symbol_to_class_string[:bmc] = 'BMC'
5
+
6
+ def article
7
+ issue_string =
8
+ if issue.nil?
9
+ ''
10
+ else
11
+ "(#{issue})"
12
+ end
13
+ vol_string =
14
+ if vol.nil?
15
+ ''
16
+ else
17
+ vol
18
+ end
19
+
20
+ # This is the original:
21
+ "#{author_list('')}: #{b(periodize(title))} #{i(journal_medline)} #{year}, #{b(vol_string)}#{issue_string}:#{pages_full}."
22
+ # ThIS IS NOT the BMC format:
23
+ # "#{author_list('.', ' ', ', ', '&', true)} (#{year}) #{title.gsub(/\.$/,'')}, #{periodize(i(journal_iso))} #{i(vol_string)}, #{pages}."
24
+ end
25
+
26
+ # Webpage (2 examples):
27
+ # b(Webpage Name) [http://chemfinder.cambridgesoft.com]
28
+
29
+ def webpage
30
+ [b(title), br(url)].compact.join(' ')
31
+ end
32
+
33
+ # TODO: book
34
+
35
+ end
36
+
37
+
38
+
data/lib/format/jtp.rb ADDED
@@ -0,0 +1,30 @@
1
+
2
+ # authors(initialed) *title* i(Journal_iso) *year*, i(vol(issue)), pages.
3
+ class Format::JTP
4
+ include Format
5
+ Format::Symbol_to_class_string[:jtp] = 'JTP'
6
+
7
+ def article
8
+ vol_issue =
9
+ if vol
10
+ "#{vol}#{par(issue)}"
11
+ else
12
+ nil
13
+ end
14
+ [author_list('.'), b(title), i(journal_iso), [b(year), vol_issue, pages].compact.join(', ')]
15
+ end
16
+
17
+ def article_to_be_submitted
18
+ [author_list('.'), i(journal_iso), i('Article to be submitted')]
19
+ end
20
+
21
+ def webpage
22
+ [periodize(b(title)), u(url), periodize("#{b(year)}, #{month} #{day}")].compact.join(' ')
23
+ end
24
+
25
+ # TODO: book
26
+
27
+
28
+ end
29
+
30
+
data/lib/format/mla.rb ADDED
@@ -0,0 +1,50 @@
1
+
2
+ # authors(initialed) *title* i(Journal_iso) *year*, i(vol(issue)), pages.
3
+ # authors(initialed) \(year\). "title" u(Journal_medline) *vol*(issue): pages.
4
+ class Format::MLA
5
+ include Format
6
+ Format::Symbol_to_class_string[:mla] = 'MLA'
7
+
8
+ def article
9
+ vol_issue =
10
+ if vol
11
+ "#{b(vol)}#{par(issue)}"
12
+ else
13
+ nil
14
+ end
15
+ [periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", u(journal_medline), "#{vol_issue}: #{pages}"].compact.join(' ') << '.'
16
+ end
17
+
18
+ def article_to_be_submitted
19
+ #[periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", i(journal_medline), b(vol), par(issue), ": #{pages}"].compact.join(' ')
20
+ [author_list('.'), "\"#{periodize(title)}\"", u(journal_medline), i('Article to be submitted')].compact.join(' ') << '.'
21
+ end
22
+
23
+ def webpage
24
+ accessed_string = nil
25
+ if year
26
+ accessed_string = '(accessed '
27
+ if month
28
+ accessed_string << month.to_s
29
+ if day
30
+ accessed_string << " #{day}, "
31
+ else
32
+ accessed_string << " "
33
+ end
34
+ end
35
+ accessed_string << year.to_s << ')'
36
+ end
37
+
38
+ [periodize(title), url, accessed_string].compact.join(' ') << '.'
39
+ end
40
+
41
+ def workshop
42
+ [periodize(author_list('.')), periodize(par(year)), "\"#{periodize(title)}\"", u(name), "#{pages}"].compact.join(' ') << '.'
43
+
44
+ end
45
+
46
+ # TODO: book
47
+
48
+ end
49
+
50
+
data/lib/formatter.rb ADDED
@@ -0,0 +1,276 @@
1
+ require 'citation'
2
+ require 'pubmed'
3
+ require 'set'
4
+ require 'ooffice'
5
+
6
+ class Formatter
7
+
8
+ FindStartCitation = '#['
9
+ FindEndCitation = ']'
10
+ SplitCitations = ','
11
+ StartNumbering = 1
12
+ ReplaceWith = 'X'
13
+
14
+
15
+ # later we can figure out the little trick to do this on the fly
16
+ # the letter for the entry being added
17
+ ArSizeToLetter = {
18
+ 1 => 'b',
19
+ 2 => 'c',
20
+ 3 => 'd',
21
+ 4 => 'e',
22
+ 5 => 'f',
23
+ 6 => 'g',
24
+ 7 => 'h',
25
+ 8 => 'i',
26
+ 9 => 'j',
27
+ 10 => 'k',
28
+ }
29
+
30
+ # outputs an html string and modifies the document (only .odt files
31
+ # currently)
32
+ # biblio is a yaml file
33
+ # creates .cit.odt file with citations replaced
34
+ def create_bibliography(document, biblio, options={})
35
+ bib = Bibliography.from_yaml(biblio)
36
+ new_document = document.sub(/\.odt$/, '.cit.odt')
37
+
38
+ ordered_cit_ids = nil
39
+ if options[:bib] == :mla
40
+ OpenOffice.new.modify_content(document, new_document) do |xml|
41
+ if cp = options[:cit_pos]
42
+ xml = reposition_citations(xml, cp)
43
+ end
44
+ # this may actually change the year on some bibs
45
+ (new_xml, ordered_cit_ids) = replace_citations_mla(xml, bib)
46
+ new_xml
47
+ end
48
+ unless ordered_cit_ids ; abort "Couldn't get citations" end
49
+ bib.select_by_id!(ordered_cit_ids)
50
+ else
51
+ OpenOffice.new.modify_content(document, new_document) do |xml|
52
+ if cp = options[:cit_pos]
53
+ xml = reposition_citations(xml, cp)
54
+ end
55
+ if options[:bracket]
56
+ (new_xml, ordered_cit_ids) = replace_citations_numerically(xml, FindStartCitation, FindEndCitation, SplitCitations, StartNumbering, ' [X]')
57
+ # (string, find_start_citation='#[', find_end_citation=']', split_citations=',', start_numbering=1, replace_with='X')
58
+ else
59
+ (new_xml, ordered_cit_ids) = replace_citations_numerically(xml)
60
+ end
61
+ new_xml
62
+ end
63
+ unless ordered_cit_ids ; abort "Couldn't get citations" end
64
+ bib.select_by_id!(ordered_cit_ids)
65
+ end
66
+ bib
67
+ end
68
+
69
+ # moves all citations following '.' or ',' to desired position
70
+ # position = :before or :after
71
+ #
72
+ def reposition_citations(xml, position)
73
+ # search<text:span text:style-name="T29">#[Keller2002]</text:span>,
74
+ # --> search,<text:span text:style-name="T29">#[Keller2002]</text:span>
75
+ # human samples<text:span text:style-name="T29">#[Qian2005]</text:span>.
76
+ # --> human samples.<text:span text:style-name="T29">#[Qian2005]</text:span>
77
+ # OR human samples#[Qian2005].
78
+ # --> human samples.#[Qian2005]
79
+ case position
80
+ when :after
81
+ xml = xml.gsub(/(#\[.*?\])([\.\,])/) do |md|
82
+ $2 + $1
83
+ end
84
+ xml.gsub(/(<[^\/][^<]+>)(#\[.*?\])(<\/.*?>)([\.\,])/) do |md|
85
+ $4 + $1 + $2 + $3
86
+ end
87
+ when :before
88
+ xml = xml.gsub(/([\.\,])(#\[.*?\])/) do |md|
89
+ $2 + $1
90
+ end
91
+ xml.gsub(/([\.\,])(<[^\/][^<]+>)(#\[.*?\])(<\/.*?>)/) do |md|
92
+ $2 + $3 + $4 + $1
93
+ end
94
+
95
+ end
96
+ end
97
+
98
+ def to_mla_ref(citation)
99
+ if citation.bibtype == :webpage
100
+ citation.title
101
+ else
102
+ author_bit =
103
+ if citation.authors.is_a? String # just use first listed thing for now
104
+ citation.authors.split(/\s+/).first
105
+ else # assuming array
106
+ aa = citation.authors
107
+ case aa.size
108
+ when 1
109
+ aa[0].last
110
+ when 2
111
+ aa[0].last + ' and ' + aa[1].last
112
+ #aa[0].last + ' &amp; ' + aa[1].last ## using an ampersand
113
+ else
114
+ aa[0].last + ' et al.'
115
+ end
116
+ end
117
+ author_bit + ', ' + "#{citation.year}"
118
+ end
119
+ end
120
+
121
+ # order can be :alphabetical or :as_is
122
+ # warning: may change the year on some bib citations!
123
+ def replace_citations_mla(string, bib, start_citation='#[', end_citation=']', split_citations=',', order=:alphabetical)
124
+ regex_string = Regexp.escape(start_citation) + '(.*?)' + Regexp.escape(end_citation)
125
+ id_to_cit = {}
126
+ bib.citations.each do |cit|
127
+ id_to_cit[cit.ident] = cit
128
+ end
129
+
130
+ # This is a hash that helps to distinguish between citations with the same
131
+ # mla_ref.
132
+ mla_ref_to_cits = Hash.new {|h,k| h[k] = [] }
133
+ id_to_mla_ref = {} # from the citation id to the mla_ref
134
+ # First pass to determine if some author/year combos are duplicated
135
+ # in which case they will be yeara and yearb and so on (2004a).
136
+ string.scan(/#{regex_string}/) do
137
+ refs = $1.split(split_citations)
138
+ refs.each do |ref|
139
+ unless id_to_cit.key?(ref)
140
+ puts "*************************************"
141
+ puts "* No citation for: #{ref}"
142
+ puts "*************************************"
143
+ exit
144
+ end
145
+ citation = id_to_cit[ref]
146
+ mla_ref = to_mla_ref(citation)
147
+ ## alter mla_ref if necessary
148
+ if mla_ref_to_cits.key?(mla_ref)
149
+ if id_to_mla_ref.key?(citation.ident)
150
+ # this reference already has already been seen, do nothing
151
+ else # conflict
152
+ ar = mla_ref_to_cits[mla_ref]
153
+ if ar.size == 1
154
+ # change mla_ref
155
+ id_to_mla_ref[ar[0].ident] = mla_ref + 'a' # new mla_ref
156
+ # change the citation year for bibliography
157
+ id_to_cit[ar[0].ident].year << 'a'
158
+ end
159
+ letter = ArSizeToLetter[ar.size]
160
+ new_mla_ref = mla_ref + letter
161
+ # put in a modified mla_ref
162
+ id_to_mla_ref[citation.ident] = new_mla_ref
163
+ # change the year in bib
164
+ id_to_cit[citation.ident].year << letter
165
+ mla_ref_to_cits[mla_ref].push(citation)
166
+ end
167
+ else # first mla_ref of its kind
168
+ mla_ref_to_cits[mla_ref].push(citation)
169
+ id_to_mla_ref[ref] = mla_ref
170
+ end
171
+ end
172
+ end
173
+
174
+ # second pass to create actual citation with updated mla_refs
175
+ new_string = string.gsub(/#{regex_string}/) do
176
+ refs = $1.split(split_citations)
177
+ mla_refs = refs.map do |ref|
178
+ id_to_mla_ref[id_to_cit[ref].ident]
179
+ end
180
+ to_mla_citation_string(mla_refs)
181
+ end
182
+
183
+ # final list of citations will be ordered by ordering the mla_ref_to_cits
184
+ # keys (the years are already ordered a,b,c... if necessary)
185
+
186
+ ordered_cit_ids = []
187
+ sorted_mla_refs = mla_ref_to_cits.keys.sort_by {|k| k.downcase }
188
+
189
+ sorted_mla_refs.each do |mla_ref|
190
+ cits = mla_ref_to_cits[mla_ref]
191
+ ordered_cit_ids.push( *(cits.map {|cit| cit.ident }) )
192
+ end
193
+ [new_string, ordered_cit_ids]
194
+ end
195
+
196
+ # takes a string of mla_refs and spits out the final guy
197
+ def to_mla_citation_string(mla_refs)
198
+ " (#{mla_refs.join('; ')})"
199
+ end
200
+
201
+
202
+ # replace citations will take any string and replace any references
203
+ # according to find_start_citation, find_end_citation and split_citations
204
+ # returns [string, citation_hash]
205
+ # string is the substituted string, hash is hashed by reference and gives
206
+ # replace_with uses the formatting of the string given to format the
207
+ # citation. The 'X' is the numeric citation. So, X is just the number and
208
+ # '[X]' would be a bracketed number.
209
+ # the citation number
210
+ # (see tests for examples)
211
+ def replace_citations_numerically(string, find_start_citation=FindStartCitation, find_end_citation=FindEndCitation, split_citations=SplitCitations, start_numbering=StartNumbering, replace_with=ReplaceWith)
212
+ (before_X, after_X) = replace_with.split('X').map(&:to_s)
213
+ before_X ||= ''
214
+ after_X ||= ''
215
+ cits = {}
216
+ regex_string = Regexp.escape(find_start_citation) + '(.*?)' + Regexp.escape(find_end_citation)
217
+
218
+ ref_cnt = start_numbering
219
+ new_string = string.gsub(/#{regex_string}/) do
220
+ refs = $1.split(split_citations)
221
+ cit_list = refs.map do |ref|
222
+ unless cits.key? ref
223
+ cits[ref] = ref_cnt
224
+ ref_cnt += 1
225
+ end
226
+ cits[ref] # <- no formatting at this point
227
+ end
228
+ before_X + cit_string(cit_list) + after_X
229
+ end
230
+ ordered_cits = cits.map {|k,v| [v,k]}.sort.map {|ar| ar[1] }
231
+ [new_string, ordered_cits]
232
+ end
233
+
234
+ # given an array of citations, generate the string for their citation
235
+ #
236
+ # e.g. if [10, 11, 12], string should be '10-12'
237
+ # e.g. if [4, 13, 12, 17] string should be '4,12-13,17'
238
+ def cit_string(cit_num_array)
239
+ # Single citation:
240
+ if cit_num_array.size == 0
241
+ return ''
242
+ elsif cit_num_array.size == 1
243
+ return cit_num_array.first.to_s
244
+ end
245
+ # Multiple citations:
246
+ cit_num_array.sort!
247
+ #memo = [previous, running, size_cnt, string]
248
+ tracker = nil
249
+ cit_num_array.inject([nil,false,1,'']) do |memo,num|
250
+ ## not in a run:
251
+ if (num - 1) != memo[0] # not in a run:
252
+ if memo[1] # finish a run
253
+ memo[3] << "-#{memo[0]},#{num}"
254
+ else # wasn't running before:
255
+ if memo[0] # if there is a previous
256
+ memo[3] << ",#{num}"
257
+ else # the start
258
+ memo[3] << "#{num}"
259
+ end
260
+ end
261
+ memo[1] = false # state that we are not in a run
262
+ else # in a run
263
+ if memo[2] == cit_num_array.size # the last item (in a run)
264
+ memo[3] << "-#{num}"
265
+ end
266
+ memo[1] = true # state that we are running
267
+ end
268
+ memo[2] += 1 # keep track of the size
269
+ memo[0] = num # set previous number
270
+ tracker = memo
271
+ memo
272
+ end
273
+ tracker[3]
274
+ end
275
+
276
+ end