bibsync 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +6 -0
- data/bibsync.gemspec +21 -0
- data/bin/bibsync +5 -0
- data/lib/bibsync/actions/check_versions.rb +35 -0
- data/lib/bibsync/actions/fetch_from_arxiv.rb +52 -0
- data/lib/bibsync/actions/find_my_citations.rb +46 -0
- data/lib/bibsync/actions/jabref_format.rb +22 -0
- data/lib/bibsync/actions/synchronize_files.rb +41 -0
- data/lib/bibsync/actions/synchronize_metadata.rb +172 -0
- data/lib/bibsync/actions/validate.rb +49 -0
- data/lib/bibsync/actions.rb +7 -0
- data/lib/bibsync/bibliography.rb +244 -0
- data/lib/bibsync/command.rb +107 -0
- data/lib/bibsync/log.rb +59 -0
- data/lib/bibsync/transformer.rb +69 -0
- data/lib/bibsync/utils.rb +45 -0
- data/lib/bibsync/version.rb +3 -0
- data/lib/bibsync.rb +10 -0
- metadata +82 -0
data/.gitignore
ADDED
data/bibsync.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.dirname(__FILE__) + '/lib/bibsync/version'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'bibsync'
|
7
|
+
s.version = BibSync::VERSION
|
8
|
+
s.date = Date.today.to_s
|
9
|
+
s.authors = ['Daniel Mendler']
|
10
|
+
s.email = ['mail@daniel-mendler.de']
|
11
|
+
s.summary = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
|
12
|
+
s.description = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
|
13
|
+
s.homepage = 'https://github.com/minad/bibsync'
|
14
|
+
s.rubyforge_project = s.name
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = %w(lib)
|
19
|
+
|
20
|
+
s.add_runtime_dependency('nokogiri')
|
21
|
+
end
|
data/bin/bibsync
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class CheckVersions
|
4
|
+
include Log
|
5
|
+
include Utils
|
6
|
+
|
7
|
+
SliceSize = 20
|
8
|
+
|
9
|
+
def initialize(options)
|
10
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
11
|
+
raise 'Directory must be set' unless @dir = options[:dir]
|
12
|
+
@update = options[:update]
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
notice 'Check for newer version on arXiv'
|
17
|
+
@bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
|
18
|
+
begin
|
19
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, :version => false, :prefix => true) }.join(',')}&max_results=#{SliceSize}")
|
20
|
+
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
21
|
+
id.gsub!('http://arxiv.org/abs/', '')
|
22
|
+
if id != entry[i][:arxiv]
|
23
|
+
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", :key => entry[i])
|
24
|
+
arxiv_download(@dir, id) if @update
|
25
|
+
end
|
26
|
+
end
|
27
|
+
rescue => ex
|
28
|
+
error('arXiv query failed', :ex => ex)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class FetchFromArXiv
|
4
|
+
SliceSize = 20
|
5
|
+
|
6
|
+
include Log
|
7
|
+
include Utils
|
8
|
+
|
9
|
+
def initialize(options)
|
10
|
+
raise 'Fetch must be set' unless @fetch = options[:fetch]
|
11
|
+
raise 'Directory must be set' unless @dir = options[:dir]
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
ids = []
|
16
|
+
urls = []
|
17
|
+
|
18
|
+
@fetch.each do |url|
|
19
|
+
if url =~ %r{^http://arxiv.org/abs/(\d+\.\d+)$}
|
20
|
+
ids << $1
|
21
|
+
else
|
22
|
+
urls << url
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
unless urls.empty?
|
27
|
+
notice 'Starting browser for non-arXiv urls'
|
28
|
+
urls.each do |url|
|
29
|
+
info "Opening #{url}"
|
30
|
+
`xdg-open #{Shellwords.escape url}`
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
unless ids.empty?
|
35
|
+
notice 'Downloading from arXiv'
|
36
|
+
ids.each_slice(SliceSize) do |ids|
|
37
|
+
begin
|
38
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{ids.join(',')}&max_results=#{SliceSize}")
|
39
|
+
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
40
|
+
id.gsub!('http://arxiv.org/abs/', '')
|
41
|
+
info 'arXiv download', :key => id
|
42
|
+
arxiv_download(@dir, id)
|
43
|
+
end
|
44
|
+
rescue => ex
|
45
|
+
error('arXiv query failed', :ex => ex)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class FindMyCitations
|
4
|
+
include Log
|
5
|
+
include Utils
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
raise 'Tex directory must be set' unless @dir = options[:citedbyme]
|
10
|
+
raise "#{@dir} is not a directory" unless File.directory?(@dir)
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
notice 'Find citations in my TeX files'
|
15
|
+
|
16
|
+
cites = {}
|
17
|
+
Dir[File.join(@dir, '**/*.tex')].each do |file|
|
18
|
+
File.read(file).scan(/cite\{([^\}]+)\}/) do
|
19
|
+
$1.split(/\s*,\s*/).each do |key|
|
20
|
+
key.strip!
|
21
|
+
file = @bib.relative_path(file)
|
22
|
+
debug("Cited in #{file}", :key => key)
|
23
|
+
(cites[key] ||= []) << file
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
@bib.each do |entry|
|
29
|
+
next if entry.comment?
|
30
|
+
entry.delete(:cites) unless cites.include?(entry.key)
|
31
|
+
end
|
32
|
+
|
33
|
+
cites.each do |key, files|
|
34
|
+
files = files.sort.uniq.join(';')
|
35
|
+
if @bib[key]
|
36
|
+
@bib[key][:citedbyme] = files
|
37
|
+
else
|
38
|
+
warning("Cited in #{files} but not found in #{@bib.file}", :key => key)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
@bib.save
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class JabrefFormat
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
@bib.save
|
13
|
+
if File.read(@bib.file, 80) !~ /JabRef/
|
14
|
+
notice 'Transforming file with JabRef'
|
15
|
+
tmp_file = "#{@bib.file}.tmp.bib"
|
16
|
+
`jabref --nogui --import #{Shellwords.escape @bib.file} --output #{Shellwords.escape tmp_file} 2>&1 >/dev/null`
|
17
|
+
File.rename(tmp_file, @bib.file) if File.exists?(tmp_file)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class SynchronizeFiles
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
FileTypes = %w(djvu pdf ps)
|
8
|
+
|
9
|
+
def initialize(options)
|
10
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
11
|
+
raise 'Directory must be set' unless @dir = options[:dir]
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
notice 'Synchronize with files'
|
16
|
+
|
17
|
+
files = {}
|
18
|
+
Dir[File.join(@dir, "**/*.{#{FileTypes.join(',')}}")].sort.each do |file|
|
19
|
+
name = File.basename(file)
|
20
|
+
key, type = split_filename(name)
|
21
|
+
raise "Duplicate file #{name}" if files[key]
|
22
|
+
files[key] = file
|
23
|
+
end
|
24
|
+
|
25
|
+
files.each do |key, file|
|
26
|
+
unless entry = @bib[key]
|
27
|
+
info('New file', :key => key)
|
28
|
+
entry = Bibliography::Entry.new
|
29
|
+
entry.key = key
|
30
|
+
@bib << entry
|
31
|
+
end
|
32
|
+
|
33
|
+
entry.type ||= :ARTICLE
|
34
|
+
entry.file = file
|
35
|
+
end
|
36
|
+
|
37
|
+
@bib.save
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class SynchronizeMetadata
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
@force = options[:resync]
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
notice 'Synchronize with arXiv and DOI'
|
14
|
+
|
15
|
+
@bib.each do |entry|
|
16
|
+
next if entry.comment?
|
17
|
+
|
18
|
+
if @force || !(entry[:title] && entry[:author] && entry[:year])
|
19
|
+
determine_arxiv_and_doi(entry)
|
20
|
+
|
21
|
+
if entry[:arxiv]
|
22
|
+
if entry.key == arxiv_id(entry, :prefix => false, :version => true)
|
23
|
+
entry = rename_arxiv_file(entry)
|
24
|
+
next unless entry
|
25
|
+
end
|
26
|
+
update_arxiv(entry)
|
27
|
+
end
|
28
|
+
|
29
|
+
update_doi(entry) if entry[:doi]
|
30
|
+
end
|
31
|
+
|
32
|
+
if @force || (!entry[:abstract] && entry[:doi] =~ /\A10\.1103\//)
|
33
|
+
update_aps_abstract(entry)
|
34
|
+
end
|
35
|
+
|
36
|
+
@bib.save
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def update_aps_abstract(entry)
|
43
|
+
info("Downloading APS abstract", :key => entry)
|
44
|
+
html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
|
45
|
+
entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
|
46
|
+
rescue => ex
|
47
|
+
error('Abstract download failed', :key => entry, :ex => ex)
|
48
|
+
end
|
49
|
+
|
50
|
+
def update_doi(entry)
|
51
|
+
info('Downloading doi.org metadata', :key => entry)
|
52
|
+
text = fetch("http://dx.doi.org/#{entry[:doi]}", 'Accept' => 'text/bibliography; style=bibtex')
|
53
|
+
raise text if text == 'Unknown DOI'
|
54
|
+
Bibliography::Entry.parse(text).each {|k, v| entry[k] = v }
|
55
|
+
rescue => ex
|
56
|
+
entry.delete(:doi)
|
57
|
+
error('doi download failed', :key => entry, :ex => ex)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Rename arxiv file if key contains version
|
61
|
+
def rename_arxiv_file(entry)
|
62
|
+
file = entry.file
|
63
|
+
|
64
|
+
key = arxiv_id(entry, :prefix => false, :version => false)
|
65
|
+
|
66
|
+
if old_entry = @bib[key]
|
67
|
+
# Existing entry found
|
68
|
+
@bib.delete(entry)
|
69
|
+
old_entry[:arxiv] =~ /v(\d+)$/
|
70
|
+
old_version = $1
|
71
|
+
entry[:arxiv] =~ /v(\d+)$/
|
72
|
+
new_version = $1
|
73
|
+
if old_version && new_version && old_version >= new_version
|
74
|
+
info('Not updating existing entry with older version', :key => old_entry)
|
75
|
+
File.delete(file[:path]) if file
|
76
|
+
return nil
|
77
|
+
end
|
78
|
+
|
79
|
+
old_entry[:arxiv] = entry[:arxiv]
|
80
|
+
old_entry[:doi] = entry[:doi]
|
81
|
+
entry = old_entry
|
82
|
+
info('Updating existing entry', :key => entry)
|
83
|
+
else
|
84
|
+
# This is a new entry
|
85
|
+
entry.key = key
|
86
|
+
end
|
87
|
+
|
88
|
+
if file
|
89
|
+
new_path = file[:path].sub(arxiv_id(entry, :prefix => false, :version => true), key)
|
90
|
+
File.rename(file[:path], new_path)
|
91
|
+
entry.file = new_path
|
92
|
+
end
|
93
|
+
|
94
|
+
@bib.save
|
95
|
+
|
96
|
+
entry
|
97
|
+
end
|
98
|
+
|
99
|
+
def update_arxiv(entry)
|
100
|
+
info('Downloading arXiv metadata', :key => entry)
|
101
|
+
xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, :prefix => true, :version => false)}&metadataPrefix=arXiv")
|
102
|
+
error = xml.xpath('//error').map(&:content).first
|
103
|
+
raise error if error
|
104
|
+
|
105
|
+
entry[:title] = xml.xpath('//arXiv/title').map(&:content).first
|
106
|
+
entry[:abstract] = xml.xpath('//arXiv/abstract').map(&:content).first
|
107
|
+
entry[:primaryclass] = xml.xpath('//arXiv/categories').map(&:content).first.split(/\s+/).first
|
108
|
+
entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
|
109
|
+
"{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
|
110
|
+
end.join(' and ')
|
111
|
+
entry[:journal] = ArXivJournal
|
112
|
+
entry[:eprint] = entry[:arxiv]
|
113
|
+
entry[:archiveprefix] = 'arXiv'
|
114
|
+
date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
|
115
|
+
date = Date.parse(date)
|
116
|
+
entry[:year] = date.year
|
117
|
+
entry[:month] = Bibliography::RawValue.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
|
118
|
+
doi = xml.xpath('//arXiv/doi').map(&:content).first
|
119
|
+
entry[:doi] = doi if doi
|
120
|
+
journal = xml.xpath('//arXiv/journal-ref').map(&:content).first
|
121
|
+
entry[:journal] = journal if journal
|
122
|
+
comments = xml.xpath('//arXiv/comments').map(&:content).first
|
123
|
+
entry[:comments] = comments if comments
|
124
|
+
entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
|
125
|
+
rescue => ex
|
126
|
+
entry.delete(:arxiv)
|
127
|
+
error('arXiv download failed', :key => entry, :ex => ex)
|
128
|
+
end
|
129
|
+
|
130
|
+
def determine_arxiv_and_doi(entry)
|
131
|
+
if file = entry.file
|
132
|
+
if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
|
133
|
+
debug('Searching for arXiv or doi identifier in pdf file', :key => entry)
|
134
|
+
text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
|
135
|
+
entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
|
136
|
+
entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
|
137
|
+
end
|
138
|
+
|
139
|
+
if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
|
140
|
+
debug('Interpreting file name as arXiv identifier', :key => entry)
|
141
|
+
entry[:arxiv] = $1
|
142
|
+
end
|
143
|
+
|
144
|
+
if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
|
145
|
+
debug('Interpreting file name as doi identifier', :key => entry)
|
146
|
+
entry[:doi] = "10.1103/#{$1}"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
if !entry[:arxiv] && entry[:doi]
|
151
|
+
begin
|
152
|
+
info('Fetch missing arXiv identifier', :key => entry)
|
153
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
|
154
|
+
if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
|
155
|
+
id = xml.xpath('//entry/id').map(&:content).first
|
156
|
+
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
157
|
+
entry[:arxiv] = $1
|
158
|
+
end
|
159
|
+
end
|
160
|
+
rescue => ex
|
161
|
+
error('arXiv doi query failed', :ex => ex, :key => entry)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
unless entry[:arxiv] || entry[:doi]
|
166
|
+
warning('No arXiv or doi identifier found', :key => entry)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class Validate
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
notice 'Check validity'
|
13
|
+
titles, arxivs = {}, {}
|
14
|
+
|
15
|
+
@bib.each do |entry|
|
16
|
+
next if entry.comment?
|
17
|
+
|
18
|
+
w = []
|
19
|
+
|
20
|
+
file = entry.file
|
21
|
+
w << 'Missing file' unless file && File.file?(file[:path])
|
22
|
+
|
23
|
+
w += [:title, :author, :year, :abstract].reject {|k| entry[k] }.map {|k| "Missing #{k}" }
|
24
|
+
|
25
|
+
w << 'Invalid file' if split_filename(file[:name]).first != entry.key if file
|
26
|
+
|
27
|
+
if entry[:arxiv]
|
28
|
+
id = arxiv_id(entry, :version => false, :prefix => true)
|
29
|
+
if arxivs.include?(id)
|
30
|
+
w << "ArXiv duplicate of '#{arxivs[id]}'"
|
31
|
+
else
|
32
|
+
arxivs[id] = entry.key
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
if entry[:title]
|
37
|
+
if titles.include?(entry[:title])
|
38
|
+
w << "Title duplicate of '#{titles[entry[:title]]}'"
|
39
|
+
else
|
40
|
+
titles[entry[:title]] = entry.key
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
warning(w.join(', '), :key => entry) unless w.empty?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require 'bibsync/actions/check_versions'
|
2
|
+
require 'bibsync/actions/synchronize_files'
|
3
|
+
require 'bibsync/actions/synchronize_metadata'
|
4
|
+
require 'bibsync/actions/validate'
|
5
|
+
require 'bibsync/actions/jabref_format'
|
6
|
+
require 'bibsync/actions/fetch_from_arxiv'
|
7
|
+
require 'bibsync/actions/find_my_citations'
|
@@ -0,0 +1,244 @@
|
|
1
|
+
module BibSync
|
2
|
+
class Bibliography
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_reader :file
|
6
|
+
|
7
|
+
def initialize(file = nil)
|
8
|
+
@entries, @file = {}, file
|
9
|
+
parse(File.read(@file)) if @file && File.exists?(@file)
|
10
|
+
@dirty = false
|
11
|
+
@save_hooks = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def save_hook(hook)
|
15
|
+
@save_hooks << hook
|
16
|
+
end
|
17
|
+
|
18
|
+
def dirty?
|
19
|
+
@dirty
|
20
|
+
end
|
21
|
+
|
22
|
+
def dirty!
|
23
|
+
@dirty = true
|
24
|
+
end
|
25
|
+
|
26
|
+
def [](key)
|
27
|
+
@entries[key]
|
28
|
+
end
|
29
|
+
|
30
|
+
def delete(entry)
|
31
|
+
if @entries.include?(entry.key)
|
32
|
+
@entries.delete(entry.key)
|
33
|
+
entry.bibliography = nil
|
34
|
+
dirty!
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def relative_path(file)
|
39
|
+
raise 'No filename given' unless @file
|
40
|
+
bibpath = Pathname.new(@file).realpath.parent
|
41
|
+
Pathname.new(file).realpath.relative_path_from(bibpath).to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
def each(&block)
|
45
|
+
@entries.each_value(&block)
|
46
|
+
end
|
47
|
+
|
48
|
+
def save(file = nil)
|
49
|
+
if file
|
50
|
+
@file = file
|
51
|
+
@parent_path = nil
|
52
|
+
@dirty = true
|
53
|
+
end
|
54
|
+
|
55
|
+
raise 'No filename given' unless @file
|
56
|
+
if @dirty
|
57
|
+
@save_hooks.each {|hook| hook.call(self) }
|
58
|
+
File.open("#{@file}.tmp", 'w') {|f| f.write(self) }
|
59
|
+
File.rename("#{@file}.tmp", @file)
|
60
|
+
@dirty = false
|
61
|
+
true
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def <<(entry)
|
68
|
+
entry.bibliography = self
|
69
|
+
@entries[entry.key] = entry
|
70
|
+
dirty!
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse(text)
|
74
|
+
until text.empty?
|
75
|
+
case text
|
76
|
+
when /\A(\s+|%[^\n]+\n)/
|
77
|
+
text = $'
|
78
|
+
else
|
79
|
+
entry = Entry.new
|
80
|
+
text = entry.parse(text)
|
81
|
+
entry.key ||= @entries.size
|
82
|
+
self << entry
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_s
|
88
|
+
"% #{DateTime.now}\n% Encoding: UTF8\n\n" <<
|
89
|
+
@entries.values.join("\n") << "\n"
|
90
|
+
end
|
91
|
+
|
92
|
+
class RawValue < String; end
|
93
|
+
|
94
|
+
class Entry
|
95
|
+
include Enumerable
|
96
|
+
|
97
|
+
attr_accessor :key, :type, :bibliography
|
98
|
+
|
99
|
+
def self.parse(text)
|
100
|
+
entry = Entry.new
|
101
|
+
entry.parse(text)
|
102
|
+
entry
|
103
|
+
end
|
104
|
+
|
105
|
+
def initialize
|
106
|
+
@fields = {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def file=(file)
|
110
|
+
raise 'No bibliography set' unless bibliography
|
111
|
+
file =~ /\.(\w+)$/
|
112
|
+
self[:file] = ":#{bibliography.relative_path(file)}:#{$1.upcase}" # JabRef file format "description:path:type"
|
113
|
+
file
|
114
|
+
end
|
115
|
+
|
116
|
+
def file
|
117
|
+
if self[:file]
|
118
|
+
raise 'No bibliography set' unless bibliography
|
119
|
+
description, file, type = self[:file].split(':', 3)
|
120
|
+
path = (Pathname.new(bibliography.file).realpath.parent + file).to_s
|
121
|
+
{ :name => File.basename(path), :type => type.upcase.to_sym, :path => path }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def [](key)
|
126
|
+
@fields[convert_key(key)]
|
127
|
+
end
|
128
|
+
|
129
|
+
def []=(key, value)
|
130
|
+
if value then
|
131
|
+
key = convert_key(key)
|
132
|
+
value = RawValue === value ? RawValue.new(value.to_s.strip) : value.to_s.strip
|
133
|
+
if @fields[key] != value || @fields[key].class != value.class
|
134
|
+
@fields[key] = value
|
135
|
+
dirty!
|
136
|
+
end
|
137
|
+
else
|
138
|
+
delete(key)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def delete(key)
|
143
|
+
key = convert_key(key)
|
144
|
+
if @fields.include?(key)
|
145
|
+
@fields.delete(key)
|
146
|
+
dirty!
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def each(&block)
|
151
|
+
@fields.each(&block)
|
152
|
+
end
|
153
|
+
|
154
|
+
def comment?
|
155
|
+
type.to_s.downcase == 'comment'
|
156
|
+
end
|
157
|
+
|
158
|
+
def dirty!
|
159
|
+
bibliography.dirty! if bibliography
|
160
|
+
end
|
161
|
+
|
162
|
+
def to_s
|
163
|
+
s = "@#{type}{"
|
164
|
+
if comment?
|
165
|
+
s << self[:comment]
|
166
|
+
else
|
167
|
+
s << "#{key},\n" << to_a.map {|k,v| RawValue === v ? " #{k} = #{v}" : " #{k} = {#{v}}" }.join(",\n") << "\n"
|
168
|
+
end
|
169
|
+
s << "}\n"
|
170
|
+
end
|
171
|
+
|
172
|
+
def parse(text)
|
173
|
+
raise 'Unexpected token' if text !~ /\A\s*@(\w+)\s*\{/
|
174
|
+
self.type = $1
|
175
|
+
text = $'
|
176
|
+
|
177
|
+
if comment?
|
178
|
+
text, self[:comment] = parse_field(text)
|
179
|
+
else
|
180
|
+
raise 'Expected entry key' if text !~ /([^,]+),\s*/
|
181
|
+
self.key = $1.strip
|
182
|
+
text = $'
|
183
|
+
|
184
|
+
until text.empty?
|
185
|
+
case text
|
186
|
+
when /\A(\s+|%[^\n]+\n)/
|
187
|
+
text = $'
|
188
|
+
when /\A\s*(\w+)\s*=\s*/
|
189
|
+
text, key = $', $1
|
190
|
+
if text =~ /\A\{/
|
191
|
+
text, self[key] = parse_field(text)
|
192
|
+
else
|
193
|
+
text, value = parse_field(text)
|
194
|
+
self[key] = RawValue.new(value)
|
195
|
+
end
|
196
|
+
else
|
197
|
+
break
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
raise 'Expected closing }' unless text =~ /\A\s*\}/
|
203
|
+
$'
|
204
|
+
end
|
205
|
+
|
206
|
+
private
|
207
|
+
|
208
|
+
def parse_field(text)
|
209
|
+
value = ''
|
210
|
+
count = 0
|
211
|
+
until text.empty?
|
212
|
+
case text
|
213
|
+
when /\A\{/
|
214
|
+
text = $'
|
215
|
+
value << $& if count > 0
|
216
|
+
count += 1
|
217
|
+
when /\A\}/
|
218
|
+
break if count == 0
|
219
|
+
count -= 1
|
220
|
+
text = $'
|
221
|
+
value << $& if count > 0
|
222
|
+
when /\A,/
|
223
|
+
text = $'
|
224
|
+
break if count == 0
|
225
|
+
value << $&
|
226
|
+
when /\A[^\}\{,]+/
|
227
|
+
text = $'
|
228
|
+
value << $&
|
229
|
+
else
|
230
|
+
break
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
raise 'Expected closing }' if count != 0
|
235
|
+
|
236
|
+
return text, value
|
237
|
+
end
|
238
|
+
|
239
|
+
def convert_key(key)
|
240
|
+
key.to_s.downcase.to_sym
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'bibsync'
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
module BibSync
|
5
|
+
class Command
|
6
|
+
def initialize(args)
|
7
|
+
@args = args
|
8
|
+
@options = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
@opts = OptionParser.new(&method(:set_opts))
|
13
|
+
@opts.parse!(@args)
|
14
|
+
process
|
15
|
+
exit 0
|
16
|
+
rescue Exception => ex
|
17
|
+
raise ex if Log.trace? || SystemExit === ex
|
18
|
+
$stderr.print "#{ex.class}: " if ex.class != RuntimeError
|
19
|
+
$stderr.puts ex.message
|
20
|
+
$stderr.puts ' Use --trace for backtrace.'
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def set_opts(opts)
|
27
|
+
opts.banner = "Usage: #{$0} [options]"
|
28
|
+
|
29
|
+
opts.on('-b', '--bib bibfile.bib', 'Set bibliography') do |bib|
|
30
|
+
@options[:bib] = bib
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on('-d', '--directory directory', 'Set directory') do |dir|
|
34
|
+
@options[:dir] = dir
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on('-v', '--check-versions', 'Check for updated arXiv papers') do
|
38
|
+
@options[:check_versions] = true
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on('-u', '--update', 'Update arXiv papers') do
|
42
|
+
@options[:update] = true
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on('-s', '--sync', 'Synchronize missing metadata') do
|
46
|
+
@options[:sync] = true
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on('-r', '--resync', 'Force synchronization with arXiv and DOI') do
|
50
|
+
@options[:resync] = true
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on('-m', '--citedbyme directory', 'Find my citations in my TeX files') do |dir|
|
54
|
+
@options[:citedbyme] = dir
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on('-j', '--jabref', 'Format bibliography file with JabRef') do
|
58
|
+
@options[:jabref] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on('-f', '--fetch url', 'Fetch arXiv paper as PDF file') do |url|
|
62
|
+
(@options[:fetch] ||= []) << url
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on('-V', '--verbose', 'Verbose output') do
|
66
|
+
Log.verbose!
|
67
|
+
end
|
68
|
+
|
69
|
+
opts.on('--trace', 'Show a full traceback on error') do
|
70
|
+
Log.trace!
|
71
|
+
end
|
72
|
+
|
73
|
+
opts.on('-h', '--help', 'Display this help') do
|
74
|
+
puts opts
|
75
|
+
exit
|
76
|
+
end
|
77
|
+
|
78
|
+
opts.on('--version', 'Display version information') do
|
79
|
+
puts "BibSync Version #{BibSync::VERSION}"
|
80
|
+
exit
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def process
|
85
|
+
if @args.size != 0
|
86
|
+
error 'Too many arguments'
|
87
|
+
puts @opts
|
88
|
+
exit
|
89
|
+
end
|
90
|
+
|
91
|
+
if @options[:bib]
|
92
|
+
@options[:bib] = Bibliography.new(@options[:bib])
|
93
|
+
@options[:bib].save_hook(Transformer.new)
|
94
|
+
end
|
95
|
+
|
96
|
+
actions = []
|
97
|
+
actions << :FetchFromArXiv if @options[:fetch]
|
98
|
+
actions << :CheckVersions if @options[:check_versions] || @options[:update]
|
99
|
+
actions << :SynchronizeFiles << :SynchronizeMetadata if @options[:sync] || @options[:resync]
|
100
|
+
actions << :FindMyCitations if @options[:citedbyme]
|
101
|
+
actions << :Validate
|
102
|
+
actions << :JabrefFormat if @options[:jabref]
|
103
|
+
|
104
|
+
actions.map {|a| Actions.const_get(a).new(@options) }.each {|a| a.run }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/bibsync/log.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Log
|
3
|
+
Reset = "\e[0m"
|
4
|
+
Red = "\e[31m"
|
5
|
+
Yellow = "\e[33m"
|
6
|
+
Blue = "\e[36m"
|
7
|
+
|
8
|
+
def self.verbose?
|
9
|
+
@verbose
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.verbose!
|
13
|
+
@verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.trace?
|
17
|
+
@trace
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.trace!
|
21
|
+
@trace = true
|
22
|
+
end
|
23
|
+
|
24
|
+
def debug(message, opts = {})
|
25
|
+
info(message, opts) if Log.verbose?
|
26
|
+
end
|
27
|
+
|
28
|
+
def info(message, opts = {})
|
29
|
+
log(message, opts)
|
30
|
+
end
|
31
|
+
|
32
|
+
def notice(message, opts = {})
|
33
|
+
log(message, opts.merge(:color => Blue))
|
34
|
+
end
|
35
|
+
|
36
|
+
def warning(message, opts = {})
|
37
|
+
log(message, opts.merge(:color => Yellow))
|
38
|
+
end
|
39
|
+
|
40
|
+
def error(message, opts = {})
|
41
|
+
log(message, opts.merge(:color => Red))
|
42
|
+
end
|
43
|
+
|
44
|
+
def log(message, opts = {})
|
45
|
+
if ex = opts[:ex]
|
46
|
+
message = "#{message} - #{ex.message}"
|
47
|
+
end
|
48
|
+
message = "#{opts[:color]}#{message}#{Reset}" if opts[:color]
|
49
|
+
if key = opts[:key]
|
50
|
+
key = key.key if key.respond_to? :key
|
51
|
+
message = "#{key} : #{message}"
|
52
|
+
end
|
53
|
+
puts(message)
|
54
|
+
if Log.trace? && ex = opts[:ex]
|
55
|
+
puts(ex.backtrace.join("\n"))
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module BibSync
|
2
|
+
class Transformer
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
def call(bib)
|
6
|
+
bib.each do |entry|
|
7
|
+
next if entry.comment?
|
8
|
+
|
9
|
+
if entry[:author]
|
10
|
+
entry[:author] = entry[:author].gsub(/\{(\w+)\}/, '\\1').gsub(/#/, ' and ')
|
11
|
+
end
|
12
|
+
|
13
|
+
if entry[:doi] && entry[:doi] =~ /(PhysRev|RevModPhys).*?\.(\d+)$/
|
14
|
+
entry[:publisher] ||= 'American Physical Society'
|
15
|
+
entry[:pages] ||= $2
|
16
|
+
end
|
17
|
+
|
18
|
+
if entry[:publisher] && entry[:publisher] =~ /American Physical Society/i
|
19
|
+
entry[:publisher] = 'American Physical Society'
|
20
|
+
end
|
21
|
+
|
22
|
+
if entry[:month]
|
23
|
+
entry[:month] = Bibliography::RawValue.new(entry[:month].downcase)
|
24
|
+
end
|
25
|
+
|
26
|
+
if entry[:journal]
|
27
|
+
if entry[:journal] =~ /EPL/
|
28
|
+
entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
|
29
|
+
entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |\Z)/
|
30
|
+
entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |\Z)/
|
31
|
+
entry[:journal] = 'Europhysics Letters'
|
32
|
+
end
|
33
|
+
|
34
|
+
if entry[:journal] =~ /(Phys\.|Physical) (Rev\.|Review) Lett[^ ]+ /
|
35
|
+
entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
|
36
|
+
entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |,|\Z)/
|
37
|
+
entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |,|\Z)/
|
38
|
+
entry[:journal] = 'Physical Review Letters'
|
39
|
+
end
|
40
|
+
|
41
|
+
if entry[:journal] =~ /(Phys\.|Physical) (Rev\.|Review) (\w) /
|
42
|
+
letter = $3
|
43
|
+
entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
|
44
|
+
entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |,|\Z)/
|
45
|
+
entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |,|\Z)/
|
46
|
+
entry[:journal] = "Physical Review #{letter}"
|
47
|
+
end
|
48
|
+
|
49
|
+
case entry[:journal]
|
50
|
+
when /\APhysical Review (\w)\Z/i
|
51
|
+
entry[:shortjournal] = "PR#{$1.upcase}"
|
52
|
+
when /\APhysical Review Letters\Z/i
|
53
|
+
entry[:shortjournal] = 'PRL'
|
54
|
+
when /\AReviews of Modern Physics\Z/i
|
55
|
+
entry[:shortjournal] = 'RMP'
|
56
|
+
when /\ANew Journal of Physics\Z/i
|
57
|
+
entry[:shortjournal] = 'NJP'
|
58
|
+
when /\A#{ArXivJournal}\Z/i
|
59
|
+
entry[:shortjournal] = 'arXiv'
|
60
|
+
when /\AEurophysics Letters\Z/i
|
61
|
+
entry[:shortjournal] = 'EPL'
|
62
|
+
else
|
63
|
+
entry[:shortjournal] = entry[:journal]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Utils
|
3
|
+
ArXivJournal = 'ArXiv e-prints'
|
4
|
+
|
5
|
+
def split_filename(file)
|
6
|
+
file =~ /^(.*?)\.(\w+)$/
|
7
|
+
return $1, $2.upcase
|
8
|
+
end
|
9
|
+
|
10
|
+
def fetch(url, headers = {})
|
11
|
+
# open(url, headers) {|f| f.read }
|
12
|
+
headers = headers.map {|k,v| '-H ' + Shellwords.escape("#{k}: #{v}") }.join(' ')
|
13
|
+
result = `curl --stderr - -S -s -L #{headers} #{Shellwords.escape url}`
|
14
|
+
raise result.chomp if $? != 0
|
15
|
+
result
|
16
|
+
end
|
17
|
+
|
18
|
+
def arxiv_download(dir, id)
|
19
|
+
url = "http://arxiv.org/pdf/#{id}"
|
20
|
+
file = File.join(dir, "#{arxiv_id(id, :version => true, :prefix => false)}.pdf")
|
21
|
+
result = `curl --stderr - -S -s -L -o #{Shellwords.escape file} #{Shellwords.escape url}`
|
22
|
+
raise result.chomp if $? != 0
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch_xml(url, headers = {})
|
26
|
+
xml = Nokogiri::XML(fetch(url, headers))
|
27
|
+
xml.remove_namespaces!
|
28
|
+
xml
|
29
|
+
end
|
30
|
+
|
31
|
+
def fetch_html(url, headers = {})
|
32
|
+
Nokogiri::HTML(fetch(url, headers))
|
33
|
+
end
|
34
|
+
|
35
|
+
def arxiv_id(arxiv, opts = {})
|
36
|
+
raise unless opts.include?(:prefix) && opts.include?(:version)
|
37
|
+
arxiv = arxiv[:arxiv] if Bibliography::Entry === arxiv
|
38
|
+
if arxiv
|
39
|
+
arxiv = arxiv.sub(/^.*\//, '') unless opts[:prefix]
|
40
|
+
arxiv = arxiv.sub(/v\d+$/, '') unless opts[:version]
|
41
|
+
arxiv
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/bibsync.rb
ADDED
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bibsync
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Daniel Mendler
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-04-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: BibSync is a tool to synchronize scientific papers and bibtex bibliography
|
31
|
+
files
|
32
|
+
email:
|
33
|
+
- mail@daniel-mendler.de
|
34
|
+
executables:
|
35
|
+
- bibsync
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- .gitignore
|
40
|
+
- bibsync.gemspec
|
41
|
+
- bin/bibsync
|
42
|
+
- lib/bibsync.rb
|
43
|
+
- lib/bibsync/actions.rb
|
44
|
+
- lib/bibsync/actions/check_versions.rb
|
45
|
+
- lib/bibsync/actions/fetch_from_arxiv.rb
|
46
|
+
- lib/bibsync/actions/find_my_citations.rb
|
47
|
+
- lib/bibsync/actions/jabref_format.rb
|
48
|
+
- lib/bibsync/actions/synchronize_files.rb
|
49
|
+
- lib/bibsync/actions/synchronize_metadata.rb
|
50
|
+
- lib/bibsync/actions/validate.rb
|
51
|
+
- lib/bibsync/bibliography.rb
|
52
|
+
- lib/bibsync/command.rb
|
53
|
+
- lib/bibsync/log.rb
|
54
|
+
- lib/bibsync/transformer.rb
|
55
|
+
- lib/bibsync/utils.rb
|
56
|
+
- lib/bibsync/version.rb
|
57
|
+
homepage: https://github.com/minad/bibsync
|
58
|
+
licenses: []
|
59
|
+
post_install_message:
|
60
|
+
rdoc_options: []
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
none: false
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
requirements: []
|
76
|
+
rubyforge_project: bibsync
|
77
|
+
rubygems_version: 1.8.24
|
78
|
+
signing_key:
|
79
|
+
specification_version: 3
|
80
|
+
summary: BibSync is a tool to synchronize scientific papers and bibtex bibliography
|
81
|
+
files
|
82
|
+
test_files: []
|