bivy 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +21 -0
- data/History +16 -0
- data/LICENSE +29 -0
- data/README.rdoc +37 -0
- data/Rakefile +43 -0
- data/TODO +12 -0
- data/VERSION +1 -0
- data/bin/bivy.rb +56 -0
- data/bin/pubmed_to_bivy.rb +78 -0
- data/doc/config.yaml +13 -0
- data/doc/src/default.css +126 -0
- data/doc/src/default.template +38 -0
- data/doc/src/tutorial/basic_flow.page +71 -0
- data/doc/src/tutorial/index.page +8 -0
- data/doc/src/tutorial/new_formats_and_media.page +83 -0
- data/jrn_abbrev/abbr_html.tgz +0 -0
- data/jrn_abbrev/abbr_to_journal.yaml +676 -0
- data/jrn_abbrev/download_abbrevs.rb +20 -0
- data/jrn_abbrev/for_ruby_class.rb +686 -0
- data/jrn_abbrev/html_to_yaml.rb +50 -0
- data/lib/bibliography.rb +144 -0
- data/lib/bivy.rb +4 -0
- data/lib/citation.rb +194 -0
- data/lib/format.rb +120 -0
- data/lib/format/acs.rb +88 -0
- data/lib/format/bioinformatics.rb +33 -0
- data/lib/format/bmc.rb +38 -0
- data/lib/format/jtp.rb +30 -0
- data/lib/format/mla.rb +50 -0
- data/lib/formatter.rb +276 -0
- data/lib/journal.rb +6 -0
- data/lib/journal/iso_to_full.yaml +1320 -0
- data/lib/journal/medline_to_full.yaml +7 -0
- data/lib/journal/medline_to_iso.yaml +45 -0
- data/lib/media.rb +88 -0
- data/lib/media/html.rb +65 -0
- data/lib/ooffice.rb +39 -0
- data/lib/pubmed.rb +209 -0
- data/lib/rtf.rb +217 -0
- data/old_stuff/old_list2refs.rb +103 -0
- data/old_stuff/pubmed2html.rb +119 -0
- data/old_stuff/pubmed_bib_write.rb +92 -0
- data/old_stuff/xml.tmp.xml +115 -0
- data/scripts/merge_bibs.rb +70 -0
- data/spec/bibliography_spec.rb +127 -0
- data/spec/citation_positions.odt +0 -0
- data/spec/formatter_spec.rb +14 -0
- data/spec/formatter_spec/cits_after.xml +2 -0
- data/spec/formatter_spec/cits_before.xml +2 -0
- data/spec/formatter_spec/content.xml +2 -0
- data/spec/ooffice_spec.rb +27 -0
- data/spec/pubmed_spec.rb +26 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/testfiles/doc1.odt +0 -0
- metadata +136 -0
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
|
4
|
+
# outputs a string that ruby will read in as a hash
|
5
|
+
def to_hash_string(hash)
|
6
|
+
string = []
|
7
|
+
string << "Medline_to_Full = {"
|
8
|
+
hash.sort.each do |k,v|
|
9
|
+
string << "'#{k}' => '#{v}',"
|
10
|
+
end
|
11
|
+
string << "}"
|
12
|
+
string.join("\n")
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
a_to_j = {} ## should be uniq mapping here
|
17
|
+
|
18
|
+
|
19
|
+
Dir["*.html"].each do |file|
|
20
|
+
puts "FILE: #{file}"
|
21
|
+
num_matches = 0
|
22
|
+
IO.read(file).scan(/<TR><TD>(.*?)<\/TD><TD>(.*?)<\/TD>/m) do |match|
|
23
|
+
pair = match.map do |m|
|
24
|
+
|
25
|
+
# The last sub is because of some bad html they have in their journal
|
26
|
+
# ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
|
27
|
+
m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
|
28
|
+
end
|
29
|
+
num_matches += 1
|
30
|
+
#if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
|
31
|
+
a_to_j[pair[0]] = pair[1]
|
32
|
+
end
|
33
|
+
IO.read(file).scan(/<TR>\n<TD>(.*?)<\/TD>\n<TD>(.*?)<\/TD>/) do |match|
|
34
|
+
pair = match.map do |m|
|
35
|
+
|
36
|
+
# The last sub is because of some bad html they have in their journal
|
37
|
+
# ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
|
38
|
+
m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
|
39
|
+
end
|
40
|
+
num_matches += 1
|
41
|
+
#if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
|
42
|
+
a_to_j[pair[0]] = pair[1]
|
43
|
+
end
|
44
|
+
|
45
|
+
puts "#{num_matches} MATHCEES"
|
46
|
+
end
|
47
|
+
|
48
|
+
File.open("for_ruby_class.rb", 'w') do |fh|
|
49
|
+
fh.print( to_hash_string(a_to_j))
|
50
|
+
end
|
data/lib/bibliography.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'citation'
|
2
|
+
require 'pubmed'
|
3
|
+
|
4
|
+
class Bibliography
|
5
|
+
|
6
|
+
attr_accessor :citations
|
7
|
+
|
8
|
+
def initialize(citations=nil)
|
9
|
+
if citations
|
10
|
+
@citations = citations
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# returns an array of citations from other that are not uniq compared self
|
15
|
+
def not_uniq(other)
|
16
|
+
scit = self.citations
|
17
|
+
ocit = other.citations
|
18
|
+
pass_id = not_uniq_by(scit, ocit, :ident)
|
19
|
+
passed = [scit, ocit].map do |ar|
|
20
|
+
ar.select {|v| v.respond_to? :pmid}
|
21
|
+
end
|
22
|
+
passed.push( *(not_uniq_by(passed[0], passed[1], :ident)) )
|
23
|
+
passed.uniq!
|
24
|
+
passed
|
25
|
+
end
|
26
|
+
|
27
|
+
def not_uniq_by(cits1, cits2, att)
|
28
|
+
self_by_att = cits1.group_by(&att)
|
29
|
+
other_by_att = cits2.group_by(&att)
|
30
|
+
not_un = []
|
31
|
+
other_by_att.each do |k,v|
|
32
|
+
if self_by_att.key? k
|
33
|
+
not_un.push( *v )
|
34
|
+
end
|
35
|
+
end
|
36
|
+
not_un
|
37
|
+
end
|
38
|
+
|
39
|
+
# adds a list of citations. It will ONLY add citations whose identifiers
|
40
|
+
# do not already exist. Citations which already have a duplicate identifier
|
41
|
+
# will be returned. nil is returned if no citation objects have clashing
|
42
|
+
# id's
|
43
|
+
def add(*citations)
|
44
|
+
clashing = []
|
45
|
+
hsh = to_hash
|
46
|
+
citations.each do |cit|
|
47
|
+
if hsh.key? cit.ident
|
48
|
+
clashing << cit
|
49
|
+
else
|
50
|
+
@citations.push(cit)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
if clashing.size > 0
|
54
|
+
clashing
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# if file, loads it
|
61
|
+
def self.from_yaml(file_or_string)
|
62
|
+
hash =
|
63
|
+
if File.exist? file_or_string
|
64
|
+
YAML.load_file(file_or_string)
|
65
|
+
else
|
66
|
+
YAML.load(file_or_string)
|
67
|
+
end
|
68
|
+
# we were given a nonexistent file and the yaml is not a hash
|
69
|
+
# in this case we need to create an empty bib object
|
70
|
+
unless hash.is_a? Hash
|
71
|
+
hash = {}
|
72
|
+
end
|
73
|
+
citations = hash.map do |id,vals|
|
74
|
+
vals['ident'] = id
|
75
|
+
bibtype = vals['bibtype']
|
76
|
+
klass =
|
77
|
+
if vals.key? 'pmid'
|
78
|
+
PubMed
|
79
|
+
else
|
80
|
+
Citation.const_get(bibtype.capitalize)
|
81
|
+
end
|
82
|
+
#when 'article'
|
83
|
+
# else
|
84
|
+
# Citation::Article
|
85
|
+
# end
|
86
|
+
#when 'book'
|
87
|
+
# Citation::Book
|
88
|
+
#else
|
89
|
+
# abort "Unrecognized bibtype!"
|
90
|
+
#end
|
91
|
+
vals['bibtype'] = bibtype.to_sym
|
92
|
+
cit = klass.new(vals)
|
93
|
+
#if cit.authors =~ /Paris/
|
94
|
+
# p cit.authors
|
95
|
+
# abort
|
96
|
+
#end
|
97
|
+
#if cit.authors.is_a? Array
|
98
|
+
# cit.authors = cit.author_strings_to_objects
|
99
|
+
#end
|
100
|
+
cit
|
101
|
+
end
|
102
|
+
bib = Bibliography.new(citations)
|
103
|
+
end
|
104
|
+
|
105
|
+
# selects as internal citations only those matching the array of idents
|
106
|
+
# returns the citations
|
107
|
+
def select_by_id!(ids)
|
108
|
+
# should mimic index_by
|
109
|
+
hash = @citations.group_by(&:ident) ; hash.each {|k,v| hash[k] = v.last }
|
110
|
+
new_cits = ids.map do |id|
|
111
|
+
unless hash.key? id ; abort "Cannot find '#{id}' in citations!" end
|
112
|
+
hash[id]
|
113
|
+
end
|
114
|
+
@citations = new_cits
|
115
|
+
end
|
116
|
+
|
117
|
+
# hashes by ident
|
118
|
+
def to_hash
|
119
|
+
hsh = {}
|
120
|
+
@citations.each do |cit|
|
121
|
+
cthash = cit.to_hash
|
122
|
+
cthash.delete('ident')
|
123
|
+
hsh[cit.ident] = cthash
|
124
|
+
end
|
125
|
+
hsh
|
126
|
+
end
|
127
|
+
|
128
|
+
# if given a file, writes to the file, otherwise returns the string
|
129
|
+
def to_yaml(file=nil)
|
130
|
+
hsh = to_hash
|
131
|
+
string = hsh.to_yaml
|
132
|
+
if file
|
133
|
+
File.open(file, 'w') {|v| v.print string }
|
134
|
+
end
|
135
|
+
string
|
136
|
+
end
|
137
|
+
|
138
|
+
# a format_obj can respond to the call obj.format(citation, format_type)
|
139
|
+
# and :header and :footer
|
140
|
+
def write(format_obj)
|
141
|
+
format_obj.header + format_obj.format(@citations) + format_obj.footer
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
data/lib/bivy.rb
ADDED
data/lib/citation.rb
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
|
2
|
+
require 'journal'
|
3
|
+
|
4
|
+
class Citation
|
5
|
+
# quotes are director or parenthetical
|
6
|
+
attr_accessor :bibtype, :ident, :quotes, :abstract
|
7
|
+
# authors should be an array of Author objects, or a string for an exact
|
8
|
+
# line
|
9
|
+
attr_reader :authors
|
10
|
+
|
11
|
+
def initialize(hash=nil)
|
12
|
+
@authors = nil
|
13
|
+
@quotes = []
|
14
|
+
# Citation::Article -> :article
|
15
|
+
@bibtype = self.class.to_s.split('::')[-1].downcase.to_sym
|
16
|
+
if hash
|
17
|
+
hash.each do |x,v|
|
18
|
+
send("#{x}=".to_sym, v)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_hash
|
24
|
+
hash = {}
|
25
|
+
others = instance_variables.select {|v| v != '@authors'}
|
26
|
+
others.each do |var|
|
27
|
+
hash[var[1..-1]] = instance_variable_get(var)
|
28
|
+
end
|
29
|
+
hash['bibtype'] = hash['bibtype'].to_s
|
30
|
+
hash['authors'] = instance_variable_get('@authors').map {|v| v.to_s }
|
31
|
+
hash
|
32
|
+
end
|
33
|
+
|
34
|
+
## We shouldn't have to do this one, it should be handled in our setter!!
|
35
|
+
#def author_strings_to_objects
|
36
|
+
# if @authors
|
37
|
+
# @authors.map do |st|
|
38
|
+
# if st.is_a? Citation::Author
|
39
|
+
# st
|
40
|
+
# else
|
41
|
+
# Citation::Author.from_s(st)
|
42
|
+
# end
|
43
|
+
# end
|
44
|
+
# else
|
45
|
+
# []
|
46
|
+
# end
|
47
|
+
#end
|
48
|
+
|
49
|
+
# given an array of strings or objects, ensures objects, given string it
|
50
|
+
# will set as a string
|
51
|
+
def authors=(array)
|
52
|
+
if array.is_a? Array
|
53
|
+
@authors = array.map do |auth|
|
54
|
+
if auth.is_a? String
|
55
|
+
Citation::Author.from_s(auth)
|
56
|
+
elsif auth.is_a? Citation::Author
|
57
|
+
auth
|
58
|
+
else
|
59
|
+
abort "Don't recognize: #{auth.class} for #{auth}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
else
|
63
|
+
# this is a string
|
64
|
+
@authors = array
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# make the yaml look like a hash
|
69
|
+
def to_yaml
|
70
|
+
to_hash.to_yaml
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
module JournalLike
|
76
|
+
attr_accessor :journal_medline, :journal_full, :journal_iso
|
77
|
+
|
78
|
+
# unless the @journal_full or @journal_iso attributes are filled in already
|
79
|
+
# will attemtp:
|
80
|
+
# This method will search Journal::Medline_to_ISO and
|
81
|
+
# Journal::Medline_to_Full and fill in the other entries, otherwise, it will
|
82
|
+
# given a medline format journal name, fills in the 3 journal attributes
|
83
|
+
def set_journal_from_medline(jrnl)
|
84
|
+
@journal_medline = jrnl
|
85
|
+
if @journal_full == nil
|
86
|
+
if Journal::Medline_to_Full.key?(jrnl)
|
87
|
+
@journal_full = Journal::Medline_to_Full[jrnl]
|
88
|
+
else
|
89
|
+
@journal_full = jrnl
|
90
|
+
end
|
91
|
+
end
|
92
|
+
if @journal_iso == nil
|
93
|
+
if Journal::Medline_to_ISO.key?(jrnl)
|
94
|
+
@journal_iso = Journal::Medline_to_ISO[jrnl]
|
95
|
+
else
|
96
|
+
@journal_iso = jrnl
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def has_journal?
|
102
|
+
journal_medline != nil
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
class Citation::Article < Citation
|
109
|
+
include JournalLike
|
110
|
+
# ident = unique identifier for placing in papers
|
111
|
+
attr_accessor :title, :year, :month, :vol, :issue, :pages
|
112
|
+
|
113
|
+
def ==(other)
|
114
|
+
if self.respond_to? :pmid
|
115
|
+
if other.respond_to?(:pmid) && (self.pmid == other.pmid)
|
116
|
+
return true
|
117
|
+
else
|
118
|
+
return false
|
119
|
+
end
|
120
|
+
else
|
121
|
+
%w(title year month vol issue pages journal_medline bibtype).each do |v|
|
122
|
+
if self.send(v.to_sym) != other.send(v.to_sym)
|
123
|
+
return false
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
return true
|
128
|
+
end
|
129
|
+
|
130
|
+
def pages_full
|
131
|
+
st_p, end_p = @pages.split('-')
|
132
|
+
if !@pages.include?('.') && end_p && end_p.to_i < st_p.to_i # 123-29
|
133
|
+
diff = st_p.size - end_p.size
|
134
|
+
new_end_p = st_p[0,diff] + end_p
|
135
|
+
[st_p, new_end_p].join('-')
|
136
|
+
else
|
137
|
+
@pages
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
class Citation::Article_to_be_submitted < Citation
|
144
|
+
include JournalLike
|
145
|
+
attr_accessor :title
|
146
|
+
end
|
147
|
+
|
148
|
+
class Citation::Workshop < Citation
|
149
|
+
attr_accessor :title, :name, :year, :pages
|
150
|
+
end
|
151
|
+
|
152
|
+
class Citation::Book < Citation
|
153
|
+
attr_accessor :title, :publisher, :year
|
154
|
+
end
|
155
|
+
|
156
|
+
class Citation::Webpage < Citation
|
157
|
+
attr_accessor :title, :year, :month, :day, :url
|
158
|
+
# month, year, day are all for the creation of the media itself
|
159
|
+
# date last accessed (String: 'yyyy-mm-dd')
|
160
|
+
attr_accessor :accessed
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
class Citation::Author
|
165
|
+
|
166
|
+
## INITIALS should be with NO spaces, all caps
|
167
|
+
attr_reader :last, :initials
|
168
|
+
def initialize(last, initials)
|
169
|
+
@last = last
|
170
|
+
@initials = initials
|
171
|
+
end
|
172
|
+
def inspect
|
173
|
+
"<#{@last}, #{initials}>"
|
174
|
+
end
|
175
|
+
|
176
|
+
def to_s
|
177
|
+
"#{@last}, #{@initials}"
|
178
|
+
end
|
179
|
+
|
180
|
+
# TODO: make this smarter for initials
|
181
|
+
def self.from_s(string)
|
182
|
+
pieces = string.split(', ')
|
183
|
+
last = pieces.shift
|
184
|
+
initials = pieces.join(', ')
|
185
|
+
self.new(last, initials)
|
186
|
+
end
|
187
|
+
|
188
|
+
def ==(other)
|
189
|
+
[self.last, self.initials] == [other.last, other.initials]
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
|
data/lib/format.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
|
2
|
+
module Format
|
3
|
+
Symbol_to_class_string = { }
|
4
|
+
|
5
|
+
MediaForwarding = {
|
6
|
+
:i => true,
|
7
|
+
:b => true,
|
8
|
+
:u => true,
|
9
|
+
:header => true,
|
10
|
+
:footer => true,
|
11
|
+
:periodize => true,
|
12
|
+
:par => true,
|
13
|
+
:br => true,
|
14
|
+
}
|
15
|
+
|
16
|
+
def self.new(media_obj, tp=:jtp)
|
17
|
+
require "format/#{tp}"
|
18
|
+
klass_st = ((x = Symbol_to_class_string[tp]) ? x : tp.to_s.capitalize)
|
19
|
+
klass = Format.const_get(klass_st)
|
20
|
+
include_super = true
|
21
|
+
obj = klass.new(media_obj)
|
22
|
+
end
|
23
|
+
|
24
|
+
def method_missing(*args)
|
25
|
+
meth = args.first
|
26
|
+
if MediaForwarding.key?(meth)
|
27
|
+
@media_obj.send(*args)
|
28
|
+
elsif @cit and @cit.respond_to?(meth)
|
29
|
+
@cit.send(*args)
|
30
|
+
else
|
31
|
+
raise NoMethodError, "method '#{meth}' called with args (#{args[1..-1].join(',')})"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(media_obj)
|
36
|
+
@media_obj = media_obj
|
37
|
+
@cit = nil
|
38
|
+
end
|
39
|
+
|
40
|
+
# The method should take an array of strings, each formatted in whatever
|
41
|
+
# method, and ensure that each string ends in a period. This is annoying to
|
42
|
+
# define, but it simplifies the writing of citation formats dramatically
|
43
|
+
#def periodize(array)
|
44
|
+
# array.map do |st|
|
45
|
+
# if st[-1,1] == '.'
|
46
|
+
# st
|
47
|
+
# else
|
48
|
+
# st << '.'
|
49
|
+
# end
|
50
|
+
# end
|
51
|
+
#end
|
52
|
+
|
53
|
+
def punctuate_initials(initials, punc='.')
|
54
|
+
initials.split('').map { |i| i + punc }.join('')
|
55
|
+
end
|
56
|
+
|
57
|
+
def format(cits)
|
58
|
+
as_strings = cits.map do |cit|
|
59
|
+
@cit = cit
|
60
|
+
finish(send(@cit.bibtype))
|
61
|
+
end
|
62
|
+
@media_obj.list(as_strings)
|
63
|
+
end
|
64
|
+
|
65
|
+
# if given an array, will finish it with compaction and periodizing
|
66
|
+
# otherwise, won't touch it
|
67
|
+
def finish(arg)
|
68
|
+
if arg.is_a? Array
|
69
|
+
periodize(arg.compact).join(' ')
|
70
|
+
else
|
71
|
+
arg
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# probably only the first argument would you ever change
|
76
|
+
# if delim is nil, then et al. format is used (1 author, fine, 2 authors
|
77
|
+
# connect with 'and', 3 authors = et al
|
78
|
+
def author_list(after_initials='.', separate_last_and_initials=' ', delim=", ", and_word="and", join_with_ands=false)
|
79
|
+
if authors.is_a? String
|
80
|
+
authors
|
81
|
+
else
|
82
|
+
names = []
|
83
|
+
names = authors.map do |auth|
|
84
|
+
auth.last + separate_last_and_initials + punctuate_initials(auth.initials, after_initials)
|
85
|
+
end
|
86
|
+
if delim.nil?
|
87
|
+
case authors.size
|
88
|
+
when 1
|
89
|
+
names.first
|
90
|
+
when 2
|
91
|
+
names.join(" #{and_word} ")
|
92
|
+
else
|
93
|
+
names.first + ' ' + i('et al.')
|
94
|
+
end
|
95
|
+
else
|
96
|
+
if join_with_ands
|
97
|
+
names[0...-1].join(delim) + " #{and_word} " + names[-1]
|
98
|
+
else
|
99
|
+
names.join(delim)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
#############################
|
106
|
+
# universal format methods
|
107
|
+
#############################
|
108
|
+
|
109
|
+
# parenthesizes any 'true' object that has to_s method, otherwise ''
|
110
|
+
def par(st)
|
111
|
+
if st
|
112
|
+
"(#{st})"
|
113
|
+
else
|
114
|
+
''
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
|