bivy 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. data/.gitignore +21 -0
  2. data/History +16 -0
  3. data/LICENSE +29 -0
  4. data/README.rdoc +37 -0
  5. data/Rakefile +43 -0
  6. data/TODO +12 -0
  7. data/VERSION +1 -0
  8. data/bin/bivy.rb +56 -0
  9. data/bin/pubmed_to_bivy.rb +78 -0
  10. data/doc/config.yaml +13 -0
  11. data/doc/src/default.css +126 -0
  12. data/doc/src/default.template +38 -0
  13. data/doc/src/tutorial/basic_flow.page +71 -0
  14. data/doc/src/tutorial/index.page +8 -0
  15. data/doc/src/tutorial/new_formats_and_media.page +83 -0
  16. data/jrn_abbrev/abbr_html.tgz +0 -0
  17. data/jrn_abbrev/abbr_to_journal.yaml +676 -0
  18. data/jrn_abbrev/download_abbrevs.rb +20 -0
  19. data/jrn_abbrev/for_ruby_class.rb +686 -0
  20. data/jrn_abbrev/html_to_yaml.rb +50 -0
  21. data/lib/bibliography.rb +144 -0
  22. data/lib/bivy.rb +4 -0
  23. data/lib/citation.rb +194 -0
  24. data/lib/format.rb +120 -0
  25. data/lib/format/acs.rb +88 -0
  26. data/lib/format/bioinformatics.rb +33 -0
  27. data/lib/format/bmc.rb +38 -0
  28. data/lib/format/jtp.rb +30 -0
  29. data/lib/format/mla.rb +50 -0
  30. data/lib/formatter.rb +276 -0
  31. data/lib/journal.rb +6 -0
  32. data/lib/journal/iso_to_full.yaml +1320 -0
  33. data/lib/journal/medline_to_full.yaml +7 -0
  34. data/lib/journal/medline_to_iso.yaml +45 -0
  35. data/lib/media.rb +88 -0
  36. data/lib/media/html.rb +65 -0
  37. data/lib/ooffice.rb +39 -0
  38. data/lib/pubmed.rb +209 -0
  39. data/lib/rtf.rb +217 -0
  40. data/old_stuff/old_list2refs.rb +103 -0
  41. data/old_stuff/pubmed2html.rb +119 -0
  42. data/old_stuff/pubmed_bib_write.rb +92 -0
  43. data/old_stuff/xml.tmp.xml +115 -0
  44. data/scripts/merge_bibs.rb +70 -0
  45. data/spec/bibliography_spec.rb +127 -0
  46. data/spec/citation_positions.odt +0 -0
  47. data/spec/formatter_spec.rb +14 -0
  48. data/spec/formatter_spec/cits_after.xml +2 -0
  49. data/spec/formatter_spec/cits_before.xml +2 -0
  50. data/spec/formatter_spec/content.xml +2 -0
  51. data/spec/ooffice_spec.rb +27 -0
  52. data/spec/pubmed_spec.rb +26 -0
  53. data/spec/spec_helper.rb +7 -0
  54. data/spec/testfiles/doc1.odt +0 -0
  55. metadata +136 -0
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ # outputs a string that ruby will read in as a hash
5
+ def to_hash_string(hash)
6
+ string = []
7
+ string << "Medline_to_Full = {"
8
+ hash.sort.each do |k,v|
9
+ string << "'#{k}' => '#{v}',"
10
+ end
11
+ string << "}"
12
+ string.join("\n")
13
+ end
14
+
15
+
16
+ a_to_j = {} ## should be uniq mapping here
17
+
18
+
19
+ Dir["*.html"].each do |file|
20
+ puts "FILE: #{file}"
21
+ num_matches = 0
22
+ IO.read(file).scan(/<TR><TD>(.*?)<\/TD><TD>(.*?)<\/TD>/m) do |match|
23
+ pair = match.map do |m|
24
+
25
+ # The last sub is because of some bad html they have in their journal
26
+ # ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
27
+ m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
28
+ end
29
+ num_matches += 1
30
+ #if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
31
+ a_to_j[pair[0]] = pair[1]
32
+ end
33
+ IO.read(file).scan(/<TR>\n<TD>(.*?)<\/TD>\n<TD>(.*?)<\/TD>/) do |match|
34
+ pair = match.map do |m|
35
+
36
+ # The last sub is because of some bad html they have in their journal
37
+ # ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
38
+ m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
39
+ end
40
+ num_matches += 1
41
+ #if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
42
+ a_to_j[pair[0]] = pair[1]
43
+ end
44
+
45
+ puts "#{num_matches} MATHCEES"
46
+ end
47
+
48
+ File.open("for_ruby_class.rb", 'w') do |fh|
49
+ fh.print( to_hash_string(a_to_j))
50
+ end
@@ -0,0 +1,144 @@
1
+ require 'citation'
2
+ require 'pubmed'
3
+
4
+ class Bibliography
5
+
6
+ attr_accessor :citations
7
+
8
+ def initialize(citations=nil)
9
+ if citations
10
+ @citations = citations
11
+ end
12
+ end
13
+
14
+ # returns an array of citations from other that are not uniq compared self
15
+ def not_uniq(other)
16
+ scit = self.citations
17
+ ocit = other.citations
18
+ pass_id = not_uniq_by(scit, ocit, :ident)
19
+ passed = [scit, ocit].map do |ar|
20
+ ar.select {|v| v.respond_to? :pmid}
21
+ end
22
+ passed.push( *(not_uniq_by(passed[0], passed[1], :ident)) )
23
+ passed.uniq!
24
+ passed
25
+ end
26
+
27
+ def not_uniq_by(cits1, cits2, att)
28
+ self_by_att = cits1.group_by(&att)
29
+ other_by_att = cits2.group_by(&att)
30
+ not_un = []
31
+ other_by_att.each do |k,v|
32
+ if self_by_att.key? k
33
+ not_un.push( *v )
34
+ end
35
+ end
36
+ not_un
37
+ end
38
+
39
+ # adds a list of citations. It will ONLY add citations whose identifiers
40
+ # do not already exist. Citations which already have a duplicate identifier
41
+ # will be returned. nil is returned if no citation objects have clashing
42
+ # id's
43
+ def add(*citations)
44
+ clashing = []
45
+ hsh = to_hash
46
+ citations.each do |cit|
47
+ if hsh.key? cit.ident
48
+ clashing << cit
49
+ else
50
+ @citations.push(cit)
51
+ end
52
+ end
53
+ if clashing.size > 0
54
+ clashing
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ # if file, loads it
61
+ def self.from_yaml(file_or_string)
62
+ hash =
63
+ if File.exist? file_or_string
64
+ YAML.load_file(file_or_string)
65
+ else
66
+ YAML.load(file_or_string)
67
+ end
68
+ # we were given a nonexistent file and the yaml is not a hash
69
+ # in this case we need to create an empty bib object
70
+ unless hash.is_a? Hash
71
+ hash = {}
72
+ end
73
+ citations = hash.map do |id,vals|
74
+ vals['ident'] = id
75
+ bibtype = vals['bibtype']
76
+ klass =
77
+ if vals.key? 'pmid'
78
+ PubMed
79
+ else
80
+ Citation.const_get(bibtype.capitalize)
81
+ end
82
+ #when 'article'
83
+ # else
84
+ # Citation::Article
85
+ # end
86
+ #when 'book'
87
+ # Citation::Book
88
+ #else
89
+ # abort "Unrecognized bibtype!"
90
+ #end
91
+ vals['bibtype'] = bibtype.to_sym
92
+ cit = klass.new(vals)
93
+ #if cit.authors =~ /Paris/
94
+ # p cit.authors
95
+ # abort
96
+ #end
97
+ #if cit.authors.is_a? Array
98
+ # cit.authors = cit.author_strings_to_objects
99
+ #end
100
+ cit
101
+ end
102
+ bib = Bibliography.new(citations)
103
+ end
104
+
105
+ # selects as internal citations only those matching the array of idents
106
+ # returns the citations
107
+ def select_by_id!(ids)
108
+ # should mimic index_by
109
+ hash = @citations.group_by(&:ident) ; hash.each {|k,v| hash[k] = v.last }
110
+ new_cits = ids.map do |id|
111
+ unless hash.key? id ; abort "Cannot find '#{id}' in citations!" end
112
+ hash[id]
113
+ end
114
+ @citations = new_cits
115
+ end
116
+
117
+ # hashes by ident
118
+ def to_hash
119
+ hsh = {}
120
+ @citations.each do |cit|
121
+ cthash = cit.to_hash
122
+ cthash.delete('ident')
123
+ hsh[cit.ident] = cthash
124
+ end
125
+ hsh
126
+ end
127
+
128
+ # if given a file, writes to the file, otherwise returns the string
129
+ def to_yaml(file=nil)
130
+ hsh = to_hash
131
+ string = hsh.to_yaml
132
+ if file
133
+ File.open(file, 'w') {|v| v.print string }
134
+ end
135
+ string
136
+ end
137
+
138
+ # a format_obj can respond to the call obj.format(citation, format_type)
139
+ # and :header and :footer
140
+ def write(format_obj)
141
+ format_obj.header + format_obj.format(@citations) + format_obj.footer
142
+ end
143
+
144
+ end
data/lib/bivy.rb ADDED
@@ -0,0 +1,4 @@
1
+
2
+ module Bivy
3
+ VERSION = '0.0.3'
4
+ end
data/lib/citation.rb ADDED
@@ -0,0 +1,194 @@
1
+
2
+ require 'journal'
3
+
4
+ class Citation
5
+ # quotes are director or parenthetical
6
+ attr_accessor :bibtype, :ident, :quotes, :abstract
7
+ # authors should be an array of Author objects, or a string for an exact
8
+ # line
9
+ attr_reader :authors
10
+
11
+ def initialize(hash=nil)
12
+ @authors = nil
13
+ @quotes = []
14
+ # Citation::Article -> :article
15
+ @bibtype = self.class.to_s.split('::')[-1].downcase.to_sym
16
+ if hash
17
+ hash.each do |x,v|
18
+ send("#{x}=".to_sym, v)
19
+ end
20
+ end
21
+ end
22
+
23
+ def to_hash
24
+ hash = {}
25
+ others = instance_variables.select {|v| v != '@authors'}
26
+ others.each do |var|
27
+ hash[var[1..-1]] = instance_variable_get(var)
28
+ end
29
+ hash['bibtype'] = hash['bibtype'].to_s
30
+ hash['authors'] = instance_variable_get('@authors').map {|v| v.to_s }
31
+ hash
32
+ end
33
+
34
+ ## We shouldn't have to do this one, it should be handled in our setter!!
35
+ #def author_strings_to_objects
36
+ # if @authors
37
+ # @authors.map do |st|
38
+ # if st.is_a? Citation::Author
39
+ # st
40
+ # else
41
+ # Citation::Author.from_s(st)
42
+ # end
43
+ # end
44
+ # else
45
+ # []
46
+ # end
47
+ #end
48
+
49
+ # given an array of strings or objects, ensures objects, given string it
50
+ # will set as a string
51
+ def authors=(array)
52
+ if array.is_a? Array
53
+ @authors = array.map do |auth|
54
+ if auth.is_a? String
55
+ Citation::Author.from_s(auth)
56
+ elsif auth.is_a? Citation::Author
57
+ auth
58
+ else
59
+ abort "Don't recognize: #{auth.class} for #{auth}"
60
+ end
61
+ end
62
+ else
63
+ # this is a string
64
+ @authors = array
65
+ end
66
+ end
67
+
68
+ # make the yaml look like a hash
69
+ def to_yaml
70
+ to_hash.to_yaml
71
+ end
72
+
73
+ end
74
+
75
+ module JournalLike
76
+ attr_accessor :journal_medline, :journal_full, :journal_iso
77
+
78
+ # unless the @journal_full or @journal_iso attributes are filled in already
79
+ # will attemtp:
80
+ # This method will search Journal::Medline_to_ISO and
81
+ # Journal::Medline_to_Full and fill in the other entries, otherwise, it will
82
+ # given a medline format journal name, fills in the 3 journal attributes
83
+ def set_journal_from_medline(jrnl)
84
+ @journal_medline = jrnl
85
+ if @journal_full == nil
86
+ if Journal::Medline_to_Full.key?(jrnl)
87
+ @journal_full = Journal::Medline_to_Full[jrnl]
88
+ else
89
+ @journal_full = jrnl
90
+ end
91
+ end
92
+ if @journal_iso == nil
93
+ if Journal::Medline_to_ISO.key?(jrnl)
94
+ @journal_iso = Journal::Medline_to_ISO[jrnl]
95
+ else
96
+ @journal_iso = jrnl
97
+ end
98
+ end
99
+ end
100
+
101
+ def has_journal?
102
+ journal_medline != nil
103
+ end
104
+
105
+
106
+ end
107
+
108
+ class Citation::Article < Citation
109
+ include JournalLike
110
+ # ident = unique identifier for placing in papers
111
+ attr_accessor :title, :year, :month, :vol, :issue, :pages
112
+
113
+ def ==(other)
114
+ if self.respond_to? :pmid
115
+ if other.respond_to?(:pmid) && (self.pmid == other.pmid)
116
+ return true
117
+ else
118
+ return false
119
+ end
120
+ else
121
+ %w(title year month vol issue pages journal_medline bibtype).each do |v|
122
+ if self.send(v.to_sym) != other.send(v.to_sym)
123
+ return false
124
+ end
125
+ end
126
+ end
127
+ return true
128
+ end
129
+
130
+ def pages_full
131
+ st_p, end_p = @pages.split('-')
132
+ if !@pages.include?('.') && end_p && end_p.to_i < st_p.to_i # 123-29
133
+ diff = st_p.size - end_p.size
134
+ new_end_p = st_p[0,diff] + end_p
135
+ [st_p, new_end_p].join('-')
136
+ else
137
+ @pages
138
+ end
139
+ end
140
+ end
141
+
142
+
143
+ class Citation::Article_to_be_submitted < Citation
144
+ include JournalLike
145
+ attr_accessor :title
146
+ end
147
+
148
+ class Citation::Workshop < Citation
149
+ attr_accessor :title, :name, :year, :pages
150
+ end
151
+
152
+ class Citation::Book < Citation
153
+ attr_accessor :title, :publisher, :year
154
+ end
155
+
156
+ class Citation::Webpage < Citation
157
+ attr_accessor :title, :year, :month, :day, :url
158
+ # month, year, day are all for the creation of the media itself
159
+ # date last accessed (String: 'yyyy-mm-dd')
160
+ attr_accessor :accessed
161
+ end
162
+
163
+
164
+ class Citation::Author
165
+
166
+ ## INITIALS should be with NO spaces, all caps
167
+ attr_reader :last, :initials
168
+ def initialize(last, initials)
169
+ @last = last
170
+ @initials = initials
171
+ end
172
+ def inspect
173
+ "<#{@last}, #{initials}>"
174
+ end
175
+
176
+ def to_s
177
+ "#{@last}, #{@initials}"
178
+ end
179
+
180
+ # TODO: make this smarter for initials
181
+ def self.from_s(string)
182
+ pieces = string.split(', ')
183
+ last = pieces.shift
184
+ initials = pieces.join(', ')
185
+ self.new(last, initials)
186
+ end
187
+
188
+ def ==(other)
189
+ [self.last, self.initials] == [other.last, other.initials]
190
+ end
191
+
192
+ end
193
+
194
+
data/lib/format.rb ADDED
@@ -0,0 +1,120 @@
1
+
2
+ module Format
3
+ Symbol_to_class_string = { }
4
+
5
+ MediaForwarding = {
6
+ :i => true,
7
+ :b => true,
8
+ :u => true,
9
+ :header => true,
10
+ :footer => true,
11
+ :periodize => true,
12
+ :par => true,
13
+ :br => true,
14
+ }
15
+
16
+ def self.new(media_obj, tp=:jtp)
17
+ require "format/#{tp}"
18
+ klass_st = ((x = Symbol_to_class_string[tp]) ? x : tp.to_s.capitalize)
19
+ klass = Format.const_get(klass_st)
20
+ include_super = true
21
+ obj = klass.new(media_obj)
22
+ end
23
+
24
+ def method_missing(*args)
25
+ meth = args.first
26
+ if MediaForwarding.key?(meth)
27
+ @media_obj.send(*args)
28
+ elsif @cit and @cit.respond_to?(meth)
29
+ @cit.send(*args)
30
+ else
31
+ raise NoMethodError, "method '#{meth}' called with args (#{args[1..-1].join(',')})"
32
+ end
33
+ end
34
+
35
+ def initialize(media_obj)
36
+ @media_obj = media_obj
37
+ @cit = nil
38
+ end
39
+
40
+ # The method should take an array of strings, each formatted in whatever
41
+ # method, and ensure that each string ends in a period. This is annoying to
42
+ # define, but it simplifies the writing of citation formats dramatically
43
+ #def periodize(array)
44
+ # array.map do |st|
45
+ # if st[-1,1] == '.'
46
+ # st
47
+ # else
48
+ # st << '.'
49
+ # end
50
+ # end
51
+ #end
52
+
53
+ def punctuate_initials(initials, punc='.')
54
+ initials.split('').map { |i| i + punc }.join('')
55
+ end
56
+
57
+ def format(cits)
58
+ as_strings = cits.map do |cit|
59
+ @cit = cit
60
+ finish(send(@cit.bibtype))
61
+ end
62
+ @media_obj.list(as_strings)
63
+ end
64
+
65
+ # if given an array, will finish it with compaction and periodizing
66
+ # otherwise, won't touch it
67
+ def finish(arg)
68
+ if arg.is_a? Array
69
+ periodize(arg.compact).join(' ')
70
+ else
71
+ arg
72
+ end
73
+ end
74
+
75
+ # probably only the first argument would you ever change
76
+ # if delim is nil, then et al. format is used (1 author, fine, 2 authors
77
+ # connect with 'and', 3 authors = et al
78
+ def author_list(after_initials='.', separate_last_and_initials=' ', delim=", ", and_word="and", join_with_ands=false)
79
+ if authors.is_a? String
80
+ authors
81
+ else
82
+ names = []
83
+ names = authors.map do |auth|
84
+ auth.last + separate_last_and_initials + punctuate_initials(auth.initials, after_initials)
85
+ end
86
+ if delim.nil?
87
+ case authors.size
88
+ when 1
89
+ names.first
90
+ when 2
91
+ names.join(" #{and_word} ")
92
+ else
93
+ names.first + ' ' + i('et al.')
94
+ end
95
+ else
96
+ if join_with_ands
97
+ names[0...-1].join(delim) + " #{and_word} " + names[-1]
98
+ else
99
+ names.join(delim)
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ #############################
106
+ # universal format methods
107
+ #############################
108
+
109
+ # parenthesizes any 'true' object that has to_s method, otherwise ''
110
+ def par(st)
111
+ if st
112
+ "(#{st})"
113
+ else
114
+ ''
115
+ end
116
+ end
117
+
118
+ end
119
+
120
+