bivy 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (55) hide show
  1. data/.gitignore +21 -0
  2. data/History +16 -0
  3. data/LICENSE +29 -0
  4. data/README.rdoc +37 -0
  5. data/Rakefile +43 -0
  6. data/TODO +12 -0
  7. data/VERSION +1 -0
  8. data/bin/bivy.rb +56 -0
  9. data/bin/pubmed_to_bivy.rb +78 -0
  10. data/doc/config.yaml +13 -0
  11. data/doc/src/default.css +126 -0
  12. data/doc/src/default.template +38 -0
  13. data/doc/src/tutorial/basic_flow.page +71 -0
  14. data/doc/src/tutorial/index.page +8 -0
  15. data/doc/src/tutorial/new_formats_and_media.page +83 -0
  16. data/jrn_abbrev/abbr_html.tgz +0 -0
  17. data/jrn_abbrev/abbr_to_journal.yaml +676 -0
  18. data/jrn_abbrev/download_abbrevs.rb +20 -0
  19. data/jrn_abbrev/for_ruby_class.rb +686 -0
  20. data/jrn_abbrev/html_to_yaml.rb +50 -0
  21. data/lib/bibliography.rb +144 -0
  22. data/lib/bivy.rb +4 -0
  23. data/lib/citation.rb +194 -0
  24. data/lib/format.rb +120 -0
  25. data/lib/format/acs.rb +88 -0
  26. data/lib/format/bioinformatics.rb +33 -0
  27. data/lib/format/bmc.rb +38 -0
  28. data/lib/format/jtp.rb +30 -0
  29. data/lib/format/mla.rb +50 -0
  30. data/lib/formatter.rb +276 -0
  31. data/lib/journal.rb +6 -0
  32. data/lib/journal/iso_to_full.yaml +1320 -0
  33. data/lib/journal/medline_to_full.yaml +7 -0
  34. data/lib/journal/medline_to_iso.yaml +45 -0
  35. data/lib/media.rb +88 -0
  36. data/lib/media/html.rb +65 -0
  37. data/lib/ooffice.rb +39 -0
  38. data/lib/pubmed.rb +209 -0
  39. data/lib/rtf.rb +217 -0
  40. data/old_stuff/old_list2refs.rb +103 -0
  41. data/old_stuff/pubmed2html.rb +119 -0
  42. data/old_stuff/pubmed_bib_write.rb +92 -0
  43. data/old_stuff/xml.tmp.xml +115 -0
  44. data/scripts/merge_bibs.rb +70 -0
  45. data/spec/bibliography_spec.rb +127 -0
  46. data/spec/citation_positions.odt +0 -0
  47. data/spec/formatter_spec.rb +14 -0
  48. data/spec/formatter_spec/cits_after.xml +2 -0
  49. data/spec/formatter_spec/cits_before.xml +2 -0
  50. data/spec/formatter_spec/content.xml +2 -0
  51. data/spec/ooffice_spec.rb +27 -0
  52. data/spec/pubmed_spec.rb +26 -0
  53. data/spec/spec_helper.rb +7 -0
  54. data/spec/testfiles/doc1.odt +0 -0
  55. metadata +136 -0
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+
4
+ # outputs a string that ruby will read in as a hash
5
+ def to_hash_string(hash)
6
+ string = []
7
+ string << "Medline_to_Full = {"
8
+ hash.sort.each do |k,v|
9
+ string << "'#{k}' => '#{v}',"
10
+ end
11
+ string << "}"
12
+ string.join("\n")
13
+ end
14
+
15
+
16
+ a_to_j = {} ## should be uniq mapping here
17
+
18
+
19
+ Dir["*.html"].each do |file|
20
+ puts "FILE: #{file}"
21
+ num_matches = 0
22
+ IO.read(file).scan(/<TR><TD>(.*?)<\/TD><TD>(.*?)<\/TD>/m) do |match|
23
+ pair = match.map do |m|
24
+
25
+ # The last sub is because of some bad html they have in their journal
26
+ # ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
27
+ m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
28
+ end
29
+ num_matches += 1
30
+ #if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
31
+ a_to_j[pair[0]] = pair[1]
32
+ end
33
+ IO.read(file).scan(/<TR>\n<TD>(.*?)<\/TD>\n<TD>(.*?)<\/TD>/) do |match|
34
+ pair = match.map do |m|
35
+
36
+ # The last sub is because of some bad html they have in their journal
37
+ # ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
38
+ m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
39
+ end
40
+ num_matches += 1
41
+ #if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
42
+ a_to_j[pair[0]] = pair[1]
43
+ end
44
+
45
+ puts "#{num_matches} MATHCEES"
46
+ end
47
+
48
+ File.open("for_ruby_class.rb", 'w') do |fh|
49
+ fh.print( to_hash_string(a_to_j))
50
+ end
@@ -0,0 +1,144 @@
1
+ require 'citation'
2
+ require 'pubmed'
3
+
4
+ class Bibliography
5
+
6
+ attr_accessor :citations
7
+
8
+ def initialize(citations=nil)
9
+ if citations
10
+ @citations = citations
11
+ end
12
+ end
13
+
14
+ # returns an array of citations from other that are not uniq compared self
15
+ def not_uniq(other)
16
+ scit = self.citations
17
+ ocit = other.citations
18
+ pass_id = not_uniq_by(scit, ocit, :ident)
19
+ passed = [scit, ocit].map do |ar|
20
+ ar.select {|v| v.respond_to? :pmid}
21
+ end
22
+ passed.push( *(not_uniq_by(passed[0], passed[1], :ident)) )
23
+ passed.uniq!
24
+ passed
25
+ end
26
+
27
+ def not_uniq_by(cits1, cits2, att)
28
+ self_by_att = cits1.group_by(&att)
29
+ other_by_att = cits2.group_by(&att)
30
+ not_un = []
31
+ other_by_att.each do |k,v|
32
+ if self_by_att.key? k
33
+ not_un.push( *v )
34
+ end
35
+ end
36
+ not_un
37
+ end
38
+
39
+ # adds a list of citations. It will ONLY add citations whose identifiers
40
+ # do not already exist. Citations which already have a duplicate identifier
41
+ # will be returned. nil is returned if no citation objects have clashing
42
+ # id's
43
+ def add(*citations)
44
+ clashing = []
45
+ hsh = to_hash
46
+ citations.each do |cit|
47
+ if hsh.key? cit.ident
48
+ clashing << cit
49
+ else
50
+ @citations.push(cit)
51
+ end
52
+ end
53
+ if clashing.size > 0
54
+ clashing
55
+ else
56
+ nil
57
+ end
58
+ end
59
+
60
+ # if file, loads it
61
+ def self.from_yaml(file_or_string)
62
+ hash =
63
+ if File.exist? file_or_string
64
+ YAML.load_file(file_or_string)
65
+ else
66
+ YAML.load(file_or_string)
67
+ end
68
+ # we were given a nonexistent file and the yaml is not a hash
69
+ # in this case we need to create an empty bib object
70
+ unless hash.is_a? Hash
71
+ hash = {}
72
+ end
73
+ citations = hash.map do |id,vals|
74
+ vals['ident'] = id
75
+ bibtype = vals['bibtype']
76
+ klass =
77
+ if vals.key? 'pmid'
78
+ PubMed
79
+ else
80
+ Citation.const_get(bibtype.capitalize)
81
+ end
82
+ #when 'article'
83
+ # else
84
+ # Citation::Article
85
+ # end
86
+ #when 'book'
87
+ # Citation::Book
88
+ #else
89
+ # abort "Unrecognized bibtype!"
90
+ #end
91
+ vals['bibtype'] = bibtype.to_sym
92
+ cit = klass.new(vals)
93
+ #if cit.authors =~ /Paris/
94
+ # p cit.authors
95
+ # abort
96
+ #end
97
+ #if cit.authors.is_a? Array
98
+ # cit.authors = cit.author_strings_to_objects
99
+ #end
100
+ cit
101
+ end
102
+ bib = Bibliography.new(citations)
103
+ end
104
+
105
+ # selects as internal citations only those matching the array of idents
106
+ # returns the citations
107
+ def select_by_id!(ids)
108
+ # should mimic index_by
109
+ hash = @citations.group_by(&:ident) ; hash.each {|k,v| hash[k] = v.last }
110
+ new_cits = ids.map do |id|
111
+ unless hash.key? id ; abort "Cannot find '#{id}' in citations!" end
112
+ hash[id]
113
+ end
114
+ @citations = new_cits
115
+ end
116
+
117
+ # hashes by ident
118
+ def to_hash
119
+ hsh = {}
120
+ @citations.each do |cit|
121
+ cthash = cit.to_hash
122
+ cthash.delete('ident')
123
+ hsh[cit.ident] = cthash
124
+ end
125
+ hsh
126
+ end
127
+
128
+ # if given a file, writes to the file, otherwise returns the string
129
+ def to_yaml(file=nil)
130
+ hsh = to_hash
131
+ string = hsh.to_yaml
132
+ if file
133
+ File.open(file, 'w') {|v| v.print string }
134
+ end
135
+ string
136
+ end
137
+
138
+ # a format_obj can respond to the call obj.format(citation, format_type)
139
+ # and :header and :footer
140
+ def write(format_obj)
141
+ format_obj.header + format_obj.format(@citations) + format_obj.footer
142
+ end
143
+
144
+ end
data/lib/bivy.rb ADDED
@@ -0,0 +1,4 @@
1
+
2
+ module Bivy
3
+ VERSION = '0.0.3'
4
+ end
data/lib/citation.rb ADDED
@@ -0,0 +1,194 @@
1
+
2
+ require 'journal'
3
+
4
+ class Citation
5
+ # quotes are director or parenthetical
6
+ attr_accessor :bibtype, :ident, :quotes, :abstract
7
+ # authors should be an array of Author objects, or a string for an exact
8
+ # line
9
+ attr_reader :authors
10
+
11
+ def initialize(hash=nil)
12
+ @authors = nil
13
+ @quotes = []
14
+ # Citation::Article -> :article
15
+ @bibtype = self.class.to_s.split('::')[-1].downcase.to_sym
16
+ if hash
17
+ hash.each do |x,v|
18
+ send("#{x}=".to_sym, v)
19
+ end
20
+ end
21
+ end
22
+
23
+ def to_hash
24
+ hash = {}
25
+ others = instance_variables.select {|v| v != '@authors'}
26
+ others.each do |var|
27
+ hash[var[1..-1]] = instance_variable_get(var)
28
+ end
29
+ hash['bibtype'] = hash['bibtype'].to_s
30
+ hash['authors'] = instance_variable_get('@authors').map {|v| v.to_s }
31
+ hash
32
+ end
33
+
34
+ ## We shouldn't have to do this one, it should be handled in our setter!!
35
+ #def author_strings_to_objects
36
+ # if @authors
37
+ # @authors.map do |st|
38
+ # if st.is_a? Citation::Author
39
+ # st
40
+ # else
41
+ # Citation::Author.from_s(st)
42
+ # end
43
+ # end
44
+ # else
45
+ # []
46
+ # end
47
+ #end
48
+
49
+ # given an array of strings or objects, ensures objects, given string it
50
+ # will set as a string
51
+ def authors=(array)
52
+ if array.is_a? Array
53
+ @authors = array.map do |auth|
54
+ if auth.is_a? String
55
+ Citation::Author.from_s(auth)
56
+ elsif auth.is_a? Citation::Author
57
+ auth
58
+ else
59
+ abort "Don't recognize: #{auth.class} for #{auth}"
60
+ end
61
+ end
62
+ else
63
+ # this is a string
64
+ @authors = array
65
+ end
66
+ end
67
+
68
+ # make the yaml look like a hash
69
+ def to_yaml
70
+ to_hash.to_yaml
71
+ end
72
+
73
+ end
74
+
75
+ module JournalLike
76
+ attr_accessor :journal_medline, :journal_full, :journal_iso
77
+
78
+ # unless the @journal_full or @journal_iso attributes are filled in already
79
+ # will attemtp:
80
+ # This method will search Journal::Medline_to_ISO and
81
+ # Journal::Medline_to_Full and fill in the other entries, otherwise, it will
82
+ # given a medline format journal name, fills in the 3 journal attributes
83
+ def set_journal_from_medline(jrnl)
84
+ @journal_medline = jrnl
85
+ if @journal_full == nil
86
+ if Journal::Medline_to_Full.key?(jrnl)
87
+ @journal_full = Journal::Medline_to_Full[jrnl]
88
+ else
89
+ @journal_full = jrnl
90
+ end
91
+ end
92
+ if @journal_iso == nil
93
+ if Journal::Medline_to_ISO.key?(jrnl)
94
+ @journal_iso = Journal::Medline_to_ISO[jrnl]
95
+ else
96
+ @journal_iso = jrnl
97
+ end
98
+ end
99
+ end
100
+
101
+ def has_journal?
102
+ journal_medline != nil
103
+ end
104
+
105
+
106
+ end
107
+
108
+ class Citation::Article < Citation
109
+ include JournalLike
110
+ # ident = unique identifier for placing in papers
111
+ attr_accessor :title, :year, :month, :vol, :issue, :pages
112
+
113
+ def ==(other)
114
+ if self.respond_to? :pmid
115
+ if other.respond_to?(:pmid) && (self.pmid == other.pmid)
116
+ return true
117
+ else
118
+ return false
119
+ end
120
+ else
121
+ %w(title year month vol issue pages journal_medline bibtype).each do |v|
122
+ if self.send(v.to_sym) != other.send(v.to_sym)
123
+ return false
124
+ end
125
+ end
126
+ end
127
+ return true
128
+ end
129
+
130
+ def pages_full
131
+ st_p, end_p = @pages.split('-')
132
+ if !@pages.include?('.') && end_p && end_p.to_i < st_p.to_i # 123-29
133
+ diff = st_p.size - end_p.size
134
+ new_end_p = st_p[0,diff] + end_p
135
+ [st_p, new_end_p].join('-')
136
+ else
137
+ @pages
138
+ end
139
+ end
140
+ end
141
+
142
+
143
+ class Citation::Article_to_be_submitted < Citation
144
+ include JournalLike
145
+ attr_accessor :title
146
+ end
147
+
148
+ class Citation::Workshop < Citation
149
+ attr_accessor :title, :name, :year, :pages
150
+ end
151
+
152
+ class Citation::Book < Citation
153
+ attr_accessor :title, :publisher, :year
154
+ end
155
+
156
+ class Citation::Webpage < Citation
157
+ attr_accessor :title, :year, :month, :day, :url
158
+ # month, year, day are all for the creation of the media itself
159
+ # date last accessed (String: 'yyyy-mm-dd')
160
+ attr_accessor :accessed
161
+ end
162
+
163
+
164
+ class Citation::Author
165
+
166
+ ## INITIALS should be with NO spaces, all caps
167
+ attr_reader :last, :initials
168
+ def initialize(last, initials)
169
+ @last = last
170
+ @initials = initials
171
+ end
172
+ def inspect
173
+ "<#{@last}, #{initials}>"
174
+ end
175
+
176
+ def to_s
177
+ "#{@last}, #{@initials}"
178
+ end
179
+
180
+ # TODO: make this smarter for initials
181
+ def self.from_s(string)
182
+ pieces = string.split(', ')
183
+ last = pieces.shift
184
+ initials = pieces.join(', ')
185
+ self.new(last, initials)
186
+ end
187
+
188
+ def ==(other)
189
+ [self.last, self.initials] == [other.last, other.initials]
190
+ end
191
+
192
+ end
193
+
194
+
data/lib/format.rb ADDED
@@ -0,0 +1,120 @@
1
+
2
+ module Format
3
+ Symbol_to_class_string = { }
4
+
5
+ MediaForwarding = {
6
+ :i => true,
7
+ :b => true,
8
+ :u => true,
9
+ :header => true,
10
+ :footer => true,
11
+ :periodize => true,
12
+ :par => true,
13
+ :br => true,
14
+ }
15
+
16
+ def self.new(media_obj, tp=:jtp)
17
+ require "format/#{tp}"
18
+ klass_st = ((x = Symbol_to_class_string[tp]) ? x : tp.to_s.capitalize)
19
+ klass = Format.const_get(klass_st)
20
+ include_super = true
21
+ obj = klass.new(media_obj)
22
+ end
23
+
24
+ def method_missing(*args)
25
+ meth = args.first
26
+ if MediaForwarding.key?(meth)
27
+ @media_obj.send(*args)
28
+ elsif @cit and @cit.respond_to?(meth)
29
+ @cit.send(*args)
30
+ else
31
+ raise NoMethodError, "method '#{meth}' called with args (#{args[1..-1].join(',')})"
32
+ end
33
+ end
34
+
35
+ def initialize(media_obj)
36
+ @media_obj = media_obj
37
+ @cit = nil
38
+ end
39
+
40
+ # The method should take an array of strings, each formatted in whatever
41
+ # method, and ensure that each string ends in a period. This is annoying to
42
+ # define, but it simplifies the writing of citation formats dramatically
43
+ #def periodize(array)
44
+ # array.map do |st|
45
+ # if st[-1,1] == '.'
46
+ # st
47
+ # else
48
+ # st << '.'
49
+ # end
50
+ # end
51
+ #end
52
+
53
+ def punctuate_initials(initials, punc='.')
54
+ initials.split('').map { |i| i + punc }.join('')
55
+ end
56
+
57
+ def format(cits)
58
+ as_strings = cits.map do |cit|
59
+ @cit = cit
60
+ finish(send(@cit.bibtype))
61
+ end
62
+ @media_obj.list(as_strings)
63
+ end
64
+
65
+ # if given an array, will finish it with compaction and periodizing
66
+ # otherwise, won't touch it
67
+ def finish(arg)
68
+ if arg.is_a? Array
69
+ periodize(arg.compact).join(' ')
70
+ else
71
+ arg
72
+ end
73
+ end
74
+
75
+ # probably only the first argument would you ever change
76
+ # if delim is nil, then et al. format is used (1 author, fine, 2 authors
77
+ # connect with 'and', 3 authors = et al
78
+ def author_list(after_initials='.', separate_last_and_initials=' ', delim=", ", and_word="and", join_with_ands=false)
79
+ if authors.is_a? String
80
+ authors
81
+ else
82
+ names = []
83
+ names = authors.map do |auth|
84
+ auth.last + separate_last_and_initials + punctuate_initials(auth.initials, after_initials)
85
+ end
86
+ if delim.nil?
87
+ case authors.size
88
+ when 1
89
+ names.first
90
+ when 2
91
+ names.join(" #{and_word} ")
92
+ else
93
+ names.first + ' ' + i('et al.')
94
+ end
95
+ else
96
+ if join_with_ands
97
+ names[0...-1].join(delim) + " #{and_word} " + names[-1]
98
+ else
99
+ names.join(delim)
100
+ end
101
+ end
102
+ end
103
+ end
104
+
105
+ #############################
106
+ # universal format methods
107
+ #############################
108
+
109
+ # parenthesizes any 'true' object that has to_s method, otherwise ''
110
+ def par(st)
111
+ if st
112
+ "(#{st})"
113
+ else
114
+ ''
115
+ end
116
+ end
117
+
118
+ end
119
+
120
+