bivy 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +21 -0
- data/History +16 -0
- data/LICENSE +29 -0
- data/README.rdoc +37 -0
- data/Rakefile +43 -0
- data/TODO +12 -0
- data/VERSION +1 -0
- data/bin/bivy.rb +56 -0
- data/bin/pubmed_to_bivy.rb +78 -0
- data/doc/config.yaml +13 -0
- data/doc/src/default.css +126 -0
- data/doc/src/default.template +38 -0
- data/doc/src/tutorial/basic_flow.page +71 -0
- data/doc/src/tutorial/index.page +8 -0
- data/doc/src/tutorial/new_formats_and_media.page +83 -0
- data/jrn_abbrev/abbr_html.tgz +0 -0
- data/jrn_abbrev/abbr_to_journal.yaml +676 -0
- data/jrn_abbrev/download_abbrevs.rb +20 -0
- data/jrn_abbrev/for_ruby_class.rb +686 -0
- data/jrn_abbrev/html_to_yaml.rb +50 -0
- data/lib/bibliography.rb +144 -0
- data/lib/bivy.rb +4 -0
- data/lib/citation.rb +194 -0
- data/lib/format.rb +120 -0
- data/lib/format/acs.rb +88 -0
- data/lib/format/bioinformatics.rb +33 -0
- data/lib/format/bmc.rb +38 -0
- data/lib/format/jtp.rb +30 -0
- data/lib/format/mla.rb +50 -0
- data/lib/formatter.rb +276 -0
- data/lib/journal.rb +6 -0
- data/lib/journal/iso_to_full.yaml +1320 -0
- data/lib/journal/medline_to_full.yaml +7 -0
- data/lib/journal/medline_to_iso.yaml +45 -0
- data/lib/media.rb +88 -0
- data/lib/media/html.rb +65 -0
- data/lib/ooffice.rb +39 -0
- data/lib/pubmed.rb +209 -0
- data/lib/rtf.rb +217 -0
- data/old_stuff/old_list2refs.rb +103 -0
- data/old_stuff/pubmed2html.rb +119 -0
- data/old_stuff/pubmed_bib_write.rb +92 -0
- data/old_stuff/xml.tmp.xml +115 -0
- data/scripts/merge_bibs.rb +70 -0
- data/spec/bibliography_spec.rb +127 -0
- data/spec/citation_positions.odt +0 -0
- data/spec/formatter_spec.rb +14 -0
- data/spec/formatter_spec/cits_after.xml +2 -0
- data/spec/formatter_spec/cits_before.xml +2 -0
- data/spec/formatter_spec/content.xml +2 -0
- data/spec/ooffice_spec.rb +27 -0
- data/spec/pubmed_spec.rb +26 -0
- data/spec/spec_helper.rb +7 -0
- data/spec/testfiles/doc1.odt +0 -0
- metadata +136 -0
@@ -0,0 +1,50 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
|
4
|
+
# outputs a string that ruby will read in as a hash
|
5
|
+
def to_hash_string(hash)
|
6
|
+
string = []
|
7
|
+
string << "Medline_to_Full = {"
|
8
|
+
hash.sort.each do |k,v|
|
9
|
+
string << "'#{k}' => '#{v}',"
|
10
|
+
end
|
11
|
+
string << "}"
|
12
|
+
string.join("\n")
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
a_to_j = {} ## should be uniq mapping here
|
17
|
+
|
18
|
+
|
19
|
+
Dir["*.html"].each do |file|
|
20
|
+
puts "FILE: #{file}"
|
21
|
+
num_matches = 0
|
22
|
+
IO.read(file).scan(/<TR><TD>(.*?)<\/TD><TD>(.*?)<\/TD>/m) do |match|
|
23
|
+
pair = match.map do |m|
|
24
|
+
|
25
|
+
# The last sub is because of some bad html they have in their journal
|
26
|
+
# ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
|
27
|
+
m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
|
28
|
+
end
|
29
|
+
num_matches += 1
|
30
|
+
#if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
|
31
|
+
a_to_j[pair[0]] = pair[1]
|
32
|
+
end
|
33
|
+
IO.read(file).scan(/<TR>\n<TD>(.*?)<\/TD>\n<TD>(.*?)<\/TD>/) do |match|
|
34
|
+
pair = match.map do |m|
|
35
|
+
|
36
|
+
# The last sub is because of some bad html they have in their journal
|
37
|
+
# ...<TR><TD>Annu Rev Plant Physiol Plant Mol Biol
|
38
|
+
m.gsub(/<\/?i>/,'').gsub(/<\/?A.*?>/,'').strip.sub(/ \(.*\)$/,'').sub(/<TR><TD>.*$/, '')
|
39
|
+
end
|
40
|
+
num_matches += 1
|
41
|
+
#if a_to_j.key? pair[0] ; puts "ALREADY HAVE KEY! " end
|
42
|
+
a_to_j[pair[0]] = pair[1]
|
43
|
+
end
|
44
|
+
|
45
|
+
puts "#{num_matches} MATHCEES"
|
46
|
+
end
|
47
|
+
|
48
|
+
File.open("for_ruby_class.rb", 'w') do |fh|
|
49
|
+
fh.print( to_hash_string(a_to_j))
|
50
|
+
end
|
data/lib/bibliography.rb
ADDED
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'citation'
|
2
|
+
require 'pubmed'
|
3
|
+
|
4
|
+
class Bibliography
|
5
|
+
|
6
|
+
attr_accessor :citations
|
7
|
+
|
8
|
+
def initialize(citations=nil)
|
9
|
+
if citations
|
10
|
+
@citations = citations
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
# returns an array of citations from other that are not uniq compared self
|
15
|
+
def not_uniq(other)
|
16
|
+
scit = self.citations
|
17
|
+
ocit = other.citations
|
18
|
+
pass_id = not_uniq_by(scit, ocit, :ident)
|
19
|
+
passed = [scit, ocit].map do |ar|
|
20
|
+
ar.select {|v| v.respond_to? :pmid}
|
21
|
+
end
|
22
|
+
passed.push( *(not_uniq_by(passed[0], passed[1], :ident)) )
|
23
|
+
passed.uniq!
|
24
|
+
passed
|
25
|
+
end
|
26
|
+
|
27
|
+
def not_uniq_by(cits1, cits2, att)
|
28
|
+
self_by_att = cits1.group_by(&att)
|
29
|
+
other_by_att = cits2.group_by(&att)
|
30
|
+
not_un = []
|
31
|
+
other_by_att.each do |k,v|
|
32
|
+
if self_by_att.key? k
|
33
|
+
not_un.push( *v )
|
34
|
+
end
|
35
|
+
end
|
36
|
+
not_un
|
37
|
+
end
|
38
|
+
|
39
|
+
# adds a list of citations. It will ONLY add citations whose identifiers
|
40
|
+
# do not already exist. Citations which already have a duplicate identifier
|
41
|
+
# will be returned. nil is returned if no citation objects have clashing
|
42
|
+
# id's
|
43
|
+
def add(*citations)
|
44
|
+
clashing = []
|
45
|
+
hsh = to_hash
|
46
|
+
citations.each do |cit|
|
47
|
+
if hsh.key? cit.ident
|
48
|
+
clashing << cit
|
49
|
+
else
|
50
|
+
@citations.push(cit)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
if clashing.size > 0
|
54
|
+
clashing
|
55
|
+
else
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# if file, loads it
|
61
|
+
def self.from_yaml(file_or_string)
|
62
|
+
hash =
|
63
|
+
if File.exist? file_or_string
|
64
|
+
YAML.load_file(file_or_string)
|
65
|
+
else
|
66
|
+
YAML.load(file_or_string)
|
67
|
+
end
|
68
|
+
# we were given a nonexistent file and the yaml is not a hash
|
69
|
+
# in this case we need to create an empty bib object
|
70
|
+
unless hash.is_a? Hash
|
71
|
+
hash = {}
|
72
|
+
end
|
73
|
+
citations = hash.map do |id,vals|
|
74
|
+
vals['ident'] = id
|
75
|
+
bibtype = vals['bibtype']
|
76
|
+
klass =
|
77
|
+
if vals.key? 'pmid'
|
78
|
+
PubMed
|
79
|
+
else
|
80
|
+
Citation.const_get(bibtype.capitalize)
|
81
|
+
end
|
82
|
+
#when 'article'
|
83
|
+
# else
|
84
|
+
# Citation::Article
|
85
|
+
# end
|
86
|
+
#when 'book'
|
87
|
+
# Citation::Book
|
88
|
+
#else
|
89
|
+
# abort "Unrecognized bibtype!"
|
90
|
+
#end
|
91
|
+
vals['bibtype'] = bibtype.to_sym
|
92
|
+
cit = klass.new(vals)
|
93
|
+
#if cit.authors =~ /Paris/
|
94
|
+
# p cit.authors
|
95
|
+
# abort
|
96
|
+
#end
|
97
|
+
#if cit.authors.is_a? Array
|
98
|
+
# cit.authors = cit.author_strings_to_objects
|
99
|
+
#end
|
100
|
+
cit
|
101
|
+
end
|
102
|
+
bib = Bibliography.new(citations)
|
103
|
+
end
|
104
|
+
|
105
|
+
# selects as internal citations only those matching the array of idents
|
106
|
+
# returns the citations
|
107
|
+
def select_by_id!(ids)
|
108
|
+
# should mimic index_by
|
109
|
+
hash = @citations.group_by(&:ident) ; hash.each {|k,v| hash[k] = v.last }
|
110
|
+
new_cits = ids.map do |id|
|
111
|
+
unless hash.key? id ; abort "Cannot find '#{id}' in citations!" end
|
112
|
+
hash[id]
|
113
|
+
end
|
114
|
+
@citations = new_cits
|
115
|
+
end
|
116
|
+
|
117
|
+
# hashes by ident
|
118
|
+
def to_hash
|
119
|
+
hsh = {}
|
120
|
+
@citations.each do |cit|
|
121
|
+
cthash = cit.to_hash
|
122
|
+
cthash.delete('ident')
|
123
|
+
hsh[cit.ident] = cthash
|
124
|
+
end
|
125
|
+
hsh
|
126
|
+
end
|
127
|
+
|
128
|
+
# if given a file, writes to the file, otherwise returns the string
|
129
|
+
def to_yaml(file=nil)
|
130
|
+
hsh = to_hash
|
131
|
+
string = hsh.to_yaml
|
132
|
+
if file
|
133
|
+
File.open(file, 'w') {|v| v.print string }
|
134
|
+
end
|
135
|
+
string
|
136
|
+
end
|
137
|
+
|
138
|
+
# a format_obj can respond to the call obj.format(citation, format_type)
|
139
|
+
# and :header and :footer
|
140
|
+
def write(format_obj)
|
141
|
+
format_obj.header + format_obj.format(@citations) + format_obj.footer
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
data/lib/bivy.rb
ADDED
data/lib/citation.rb
ADDED
@@ -0,0 +1,194 @@
|
|
1
|
+
|
2
|
+
require 'journal'
|
3
|
+
|
4
|
+
class Citation
|
5
|
+
# quotes are director or parenthetical
|
6
|
+
attr_accessor :bibtype, :ident, :quotes, :abstract
|
7
|
+
# authors should be an array of Author objects, or a string for an exact
|
8
|
+
# line
|
9
|
+
attr_reader :authors
|
10
|
+
|
11
|
+
def initialize(hash=nil)
|
12
|
+
@authors = nil
|
13
|
+
@quotes = []
|
14
|
+
# Citation::Article -> :article
|
15
|
+
@bibtype = self.class.to_s.split('::')[-1].downcase.to_sym
|
16
|
+
if hash
|
17
|
+
hash.each do |x,v|
|
18
|
+
send("#{x}=".to_sym, v)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def to_hash
|
24
|
+
hash = {}
|
25
|
+
others = instance_variables.select {|v| v != '@authors'}
|
26
|
+
others.each do |var|
|
27
|
+
hash[var[1..-1]] = instance_variable_get(var)
|
28
|
+
end
|
29
|
+
hash['bibtype'] = hash['bibtype'].to_s
|
30
|
+
hash['authors'] = instance_variable_get('@authors').map {|v| v.to_s }
|
31
|
+
hash
|
32
|
+
end
|
33
|
+
|
34
|
+
## We shouldn't have to do this one, it should be handled in our setter!!
|
35
|
+
#def author_strings_to_objects
|
36
|
+
# if @authors
|
37
|
+
# @authors.map do |st|
|
38
|
+
# if st.is_a? Citation::Author
|
39
|
+
# st
|
40
|
+
# else
|
41
|
+
# Citation::Author.from_s(st)
|
42
|
+
# end
|
43
|
+
# end
|
44
|
+
# else
|
45
|
+
# []
|
46
|
+
# end
|
47
|
+
#end
|
48
|
+
|
49
|
+
# given an array of strings or objects, ensures objects, given string it
|
50
|
+
# will set as a string
|
51
|
+
def authors=(array)
|
52
|
+
if array.is_a? Array
|
53
|
+
@authors = array.map do |auth|
|
54
|
+
if auth.is_a? String
|
55
|
+
Citation::Author.from_s(auth)
|
56
|
+
elsif auth.is_a? Citation::Author
|
57
|
+
auth
|
58
|
+
else
|
59
|
+
abort "Don't recognize: #{auth.class} for #{auth}"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
else
|
63
|
+
# this is a string
|
64
|
+
@authors = array
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# make the yaml look like a hash
|
69
|
+
def to_yaml
|
70
|
+
to_hash.to_yaml
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
module JournalLike
|
76
|
+
attr_accessor :journal_medline, :journal_full, :journal_iso
|
77
|
+
|
78
|
+
# unless the @journal_full or @journal_iso attributes are filled in already
|
79
|
+
# will attemtp:
|
80
|
+
# This method will search Journal::Medline_to_ISO and
|
81
|
+
# Journal::Medline_to_Full and fill in the other entries, otherwise, it will
|
82
|
+
# given a medline format journal name, fills in the 3 journal attributes
|
83
|
+
def set_journal_from_medline(jrnl)
|
84
|
+
@journal_medline = jrnl
|
85
|
+
if @journal_full == nil
|
86
|
+
if Journal::Medline_to_Full.key?(jrnl)
|
87
|
+
@journal_full = Journal::Medline_to_Full[jrnl]
|
88
|
+
else
|
89
|
+
@journal_full = jrnl
|
90
|
+
end
|
91
|
+
end
|
92
|
+
if @journal_iso == nil
|
93
|
+
if Journal::Medline_to_ISO.key?(jrnl)
|
94
|
+
@journal_iso = Journal::Medline_to_ISO[jrnl]
|
95
|
+
else
|
96
|
+
@journal_iso = jrnl
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def has_journal?
|
102
|
+
journal_medline != nil
|
103
|
+
end
|
104
|
+
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
class Citation::Article < Citation
|
109
|
+
include JournalLike
|
110
|
+
# ident = unique identifier for placing in papers
|
111
|
+
attr_accessor :title, :year, :month, :vol, :issue, :pages
|
112
|
+
|
113
|
+
def ==(other)
|
114
|
+
if self.respond_to? :pmid
|
115
|
+
if other.respond_to?(:pmid) && (self.pmid == other.pmid)
|
116
|
+
return true
|
117
|
+
else
|
118
|
+
return false
|
119
|
+
end
|
120
|
+
else
|
121
|
+
%w(title year month vol issue pages journal_medline bibtype).each do |v|
|
122
|
+
if self.send(v.to_sym) != other.send(v.to_sym)
|
123
|
+
return false
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
return true
|
128
|
+
end
|
129
|
+
|
130
|
+
def pages_full
|
131
|
+
st_p, end_p = @pages.split('-')
|
132
|
+
if !@pages.include?('.') && end_p && end_p.to_i < st_p.to_i # 123-29
|
133
|
+
diff = st_p.size - end_p.size
|
134
|
+
new_end_p = st_p[0,diff] + end_p
|
135
|
+
[st_p, new_end_p].join('-')
|
136
|
+
else
|
137
|
+
@pages
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
|
143
|
+
class Citation::Article_to_be_submitted < Citation
|
144
|
+
include JournalLike
|
145
|
+
attr_accessor :title
|
146
|
+
end
|
147
|
+
|
148
|
+
class Citation::Workshop < Citation
|
149
|
+
attr_accessor :title, :name, :year, :pages
|
150
|
+
end
|
151
|
+
|
152
|
+
class Citation::Book < Citation
|
153
|
+
attr_accessor :title, :publisher, :year
|
154
|
+
end
|
155
|
+
|
156
|
+
class Citation::Webpage < Citation
|
157
|
+
attr_accessor :title, :year, :month, :day, :url
|
158
|
+
# month, year, day are all for the creation of the media itself
|
159
|
+
# date last accessed (String: 'yyyy-mm-dd')
|
160
|
+
attr_accessor :accessed
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
class Citation::Author
|
165
|
+
|
166
|
+
## INITIALS should be with NO spaces, all caps
|
167
|
+
attr_reader :last, :initials
|
168
|
+
def initialize(last, initials)
|
169
|
+
@last = last
|
170
|
+
@initials = initials
|
171
|
+
end
|
172
|
+
def inspect
|
173
|
+
"<#{@last}, #{initials}>"
|
174
|
+
end
|
175
|
+
|
176
|
+
def to_s
|
177
|
+
"#{@last}, #{@initials}"
|
178
|
+
end
|
179
|
+
|
180
|
+
# TODO: make this smarter for initials
|
181
|
+
def self.from_s(string)
|
182
|
+
pieces = string.split(', ')
|
183
|
+
last = pieces.shift
|
184
|
+
initials = pieces.join(', ')
|
185
|
+
self.new(last, initials)
|
186
|
+
end
|
187
|
+
|
188
|
+
def ==(other)
|
189
|
+
[self.last, self.initials] == [other.last, other.initials]
|
190
|
+
end
|
191
|
+
|
192
|
+
end
|
193
|
+
|
194
|
+
|
data/lib/format.rb
ADDED
@@ -0,0 +1,120 @@
|
|
1
|
+
|
2
|
+
module Format
|
3
|
+
Symbol_to_class_string = { }
|
4
|
+
|
5
|
+
MediaForwarding = {
|
6
|
+
:i => true,
|
7
|
+
:b => true,
|
8
|
+
:u => true,
|
9
|
+
:header => true,
|
10
|
+
:footer => true,
|
11
|
+
:periodize => true,
|
12
|
+
:par => true,
|
13
|
+
:br => true,
|
14
|
+
}
|
15
|
+
|
16
|
+
def self.new(media_obj, tp=:jtp)
|
17
|
+
require "format/#{tp}"
|
18
|
+
klass_st = ((x = Symbol_to_class_string[tp]) ? x : tp.to_s.capitalize)
|
19
|
+
klass = Format.const_get(klass_st)
|
20
|
+
include_super = true
|
21
|
+
obj = klass.new(media_obj)
|
22
|
+
end
|
23
|
+
|
24
|
+
def method_missing(*args)
|
25
|
+
meth = args.first
|
26
|
+
if MediaForwarding.key?(meth)
|
27
|
+
@media_obj.send(*args)
|
28
|
+
elsif @cit and @cit.respond_to?(meth)
|
29
|
+
@cit.send(*args)
|
30
|
+
else
|
31
|
+
raise NoMethodError, "method '#{meth}' called with args (#{args[1..-1].join(',')})"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(media_obj)
|
36
|
+
@media_obj = media_obj
|
37
|
+
@cit = nil
|
38
|
+
end
|
39
|
+
|
40
|
+
# The method should take an array of strings, each formatted in whatever
|
41
|
+
# method, and ensure that each string ends in a period. This is annoying to
|
42
|
+
# define, but it simplifies the writing of citation formats dramatically
|
43
|
+
#def periodize(array)
|
44
|
+
# array.map do |st|
|
45
|
+
# if st[-1,1] == '.'
|
46
|
+
# st
|
47
|
+
# else
|
48
|
+
# st << '.'
|
49
|
+
# end
|
50
|
+
# end
|
51
|
+
#end
|
52
|
+
|
53
|
+
def punctuate_initials(initials, punc='.')
|
54
|
+
initials.split('').map { |i| i + punc }.join('')
|
55
|
+
end
|
56
|
+
|
57
|
+
def format(cits)
|
58
|
+
as_strings = cits.map do |cit|
|
59
|
+
@cit = cit
|
60
|
+
finish(send(@cit.bibtype))
|
61
|
+
end
|
62
|
+
@media_obj.list(as_strings)
|
63
|
+
end
|
64
|
+
|
65
|
+
# if given an array, will finish it with compaction and periodizing
|
66
|
+
# otherwise, won't touch it
|
67
|
+
def finish(arg)
|
68
|
+
if arg.is_a? Array
|
69
|
+
periodize(arg.compact).join(' ')
|
70
|
+
else
|
71
|
+
arg
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# probably only the first argument would you ever change
|
76
|
+
# if delim is nil, then et al. format is used (1 author, fine, 2 authors
|
77
|
+
# connect with 'and', 3 authors = et al
|
78
|
+
def author_list(after_initials='.', separate_last_and_initials=' ', delim=", ", and_word="and", join_with_ands=false)
|
79
|
+
if authors.is_a? String
|
80
|
+
authors
|
81
|
+
else
|
82
|
+
names = []
|
83
|
+
names = authors.map do |auth|
|
84
|
+
auth.last + separate_last_and_initials + punctuate_initials(auth.initials, after_initials)
|
85
|
+
end
|
86
|
+
if delim.nil?
|
87
|
+
case authors.size
|
88
|
+
when 1
|
89
|
+
names.first
|
90
|
+
when 2
|
91
|
+
names.join(" #{and_word} ")
|
92
|
+
else
|
93
|
+
names.first + ' ' + i('et al.')
|
94
|
+
end
|
95
|
+
else
|
96
|
+
if join_with_ands
|
97
|
+
names[0...-1].join(delim) + " #{and_word} " + names[-1]
|
98
|
+
else
|
99
|
+
names.join(delim)
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
#############################
|
106
|
+
# universal format methods
|
107
|
+
#############################
|
108
|
+
|
109
|
+
# parenthesizes any 'true' object that has to_s method, otherwise ''
|
110
|
+
def par(st)
|
111
|
+
if st
|
112
|
+
"(#{st})"
|
113
|
+
else
|
114
|
+
''
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
|
120
|
+
|