bibsync 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +6 -0
- data/bibsync.gemspec +21 -0
- data/bin/bibsync +5 -0
- data/lib/bibsync/actions/check_versions.rb +35 -0
- data/lib/bibsync/actions/fetch_from_arxiv.rb +52 -0
- data/lib/bibsync/actions/find_my_citations.rb +46 -0
- data/lib/bibsync/actions/jabref_format.rb +22 -0
- data/lib/bibsync/actions/synchronize_files.rb +41 -0
- data/lib/bibsync/actions/synchronize_metadata.rb +172 -0
- data/lib/bibsync/actions/validate.rb +49 -0
- data/lib/bibsync/actions.rb +7 -0
- data/lib/bibsync/bibliography.rb +244 -0
- data/lib/bibsync/command.rb +107 -0
- data/lib/bibsync/log.rb +59 -0
- data/lib/bibsync/transformer.rb +69 -0
- data/lib/bibsync/utils.rb +45 -0
- data/lib/bibsync/version.rb +3 -0
- data/lib/bibsync.rb +10 -0
- metadata +82 -0
data/.gitignore
ADDED
data/bibsync.gemspec
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.dirname(__FILE__) + '/lib/bibsync/version'
|
3
|
+
require 'date'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'bibsync'
|
7
|
+
s.version = BibSync::VERSION
|
8
|
+
s.date = Date.today.to_s
|
9
|
+
s.authors = ['Daniel Mendler']
|
10
|
+
s.email = ['mail@daniel-mendler.de']
|
11
|
+
s.summary = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
|
12
|
+
s.description = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
|
13
|
+
s.homepage = 'https://github.com/minad/bibsync'
|
14
|
+
s.rubyforge_project = s.name
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
|
+
s.require_paths = %w(lib)
|
19
|
+
|
20
|
+
s.add_runtime_dependency('nokogiri')
|
21
|
+
end
|
data/bin/bibsync
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class CheckVersions
|
4
|
+
include Log
|
5
|
+
include Utils
|
6
|
+
|
7
|
+
SliceSize = 20
|
8
|
+
|
9
|
+
def initialize(options)
|
10
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
11
|
+
raise 'Directory must be set' unless @dir = options[:dir]
|
12
|
+
@update = options[:update]
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
notice 'Check for newer version on arXiv'
|
17
|
+
@bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
|
18
|
+
begin
|
19
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, :version => false, :prefix => true) }.join(',')}&max_results=#{SliceSize}")
|
20
|
+
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
21
|
+
id.gsub!('http://arxiv.org/abs/', '')
|
22
|
+
if id != entry[i][:arxiv]
|
23
|
+
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", :key => entry[i])
|
24
|
+
arxiv_download(@dir, id) if @update
|
25
|
+
end
|
26
|
+
end
|
27
|
+
rescue => ex
|
28
|
+
error('arXiv query failed', :ex => ex)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class FetchFromArXiv
|
4
|
+
SliceSize = 20
|
5
|
+
|
6
|
+
include Log
|
7
|
+
include Utils
|
8
|
+
|
9
|
+
def initialize(options)
|
10
|
+
raise 'Fetch must be set' unless @fetch = options[:fetch]
|
11
|
+
raise 'Directory must be set' unless @dir = options[:dir]
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
ids = []
|
16
|
+
urls = []
|
17
|
+
|
18
|
+
@fetch.each do |url|
|
19
|
+
if url =~ %r{^http://arxiv.org/abs/(\d+\.\d+)$}
|
20
|
+
ids << $1
|
21
|
+
else
|
22
|
+
urls << url
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
unless urls.empty?
|
27
|
+
notice 'Starting browser for non-arXiv urls'
|
28
|
+
urls.each do |url|
|
29
|
+
info "Opening #{url}"
|
30
|
+
`xdg-open #{Shellwords.escape url}`
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
unless ids.empty?
|
35
|
+
notice 'Downloading from arXiv'
|
36
|
+
ids.each_slice(SliceSize) do |ids|
|
37
|
+
begin
|
38
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{ids.join(',')}&max_results=#{SliceSize}")
|
39
|
+
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
40
|
+
id.gsub!('http://arxiv.org/abs/', '')
|
41
|
+
info 'arXiv download', :key => id
|
42
|
+
arxiv_download(@dir, id)
|
43
|
+
end
|
44
|
+
rescue => ex
|
45
|
+
error('arXiv query failed', :ex => ex)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class FindMyCitations
|
4
|
+
include Log
|
5
|
+
include Utils
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
raise 'Tex directory must be set' unless @dir = options[:citedbyme]
|
10
|
+
raise "#{@dir} is not a directory" unless File.directory?(@dir)
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
notice 'Find citations in my TeX files'
|
15
|
+
|
16
|
+
cites = {}
|
17
|
+
Dir[File.join(@dir, '**/*.tex')].each do |file|
|
18
|
+
File.read(file).scan(/cite\{([^\}]+)\}/) do
|
19
|
+
$1.split(/\s*,\s*/).each do |key|
|
20
|
+
key.strip!
|
21
|
+
file = @bib.relative_path(file)
|
22
|
+
debug("Cited in #{file}", :key => key)
|
23
|
+
(cites[key] ||= []) << file
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
@bib.each do |entry|
|
29
|
+
next if entry.comment?
|
30
|
+
entry.delete(:cites) unless cites.include?(entry.key)
|
31
|
+
end
|
32
|
+
|
33
|
+
cites.each do |key, files|
|
34
|
+
files = files.sort.uniq.join(';')
|
35
|
+
if @bib[key]
|
36
|
+
@bib[key][:citedbyme] = files
|
37
|
+
else
|
38
|
+
warning("Cited in #{files} but not found in #{@bib.file}", :key => key)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
@bib.save
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class JabrefFormat
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
@bib.save
|
13
|
+
if File.read(@bib.file, 80) !~ /JabRef/
|
14
|
+
notice 'Transforming file with JabRef'
|
15
|
+
tmp_file = "#{@bib.file}.tmp.bib"
|
16
|
+
`jabref --nogui --import #{Shellwords.escape @bib.file} --output #{Shellwords.escape tmp_file} 2>&1 >/dev/null`
|
17
|
+
File.rename(tmp_file, @bib.file) if File.exists?(tmp_file)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class SynchronizeFiles
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
FileTypes = %w(djvu pdf ps)
|
8
|
+
|
9
|
+
def initialize(options)
|
10
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
11
|
+
raise 'Directory must be set' unless @dir = options[:dir]
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
notice 'Synchronize with files'
|
16
|
+
|
17
|
+
files = {}
|
18
|
+
Dir[File.join(@dir, "**/*.{#{FileTypes.join(',')}}")].sort.each do |file|
|
19
|
+
name = File.basename(file)
|
20
|
+
key, type = split_filename(name)
|
21
|
+
raise "Duplicate file #{name}" if files[key]
|
22
|
+
files[key] = file
|
23
|
+
end
|
24
|
+
|
25
|
+
files.each do |key, file|
|
26
|
+
unless entry = @bib[key]
|
27
|
+
info('New file', :key => key)
|
28
|
+
entry = Bibliography::Entry.new
|
29
|
+
entry.key = key
|
30
|
+
@bib << entry
|
31
|
+
end
|
32
|
+
|
33
|
+
entry.type ||= :ARTICLE
|
34
|
+
entry.file = file
|
35
|
+
end
|
36
|
+
|
37
|
+
@bib.save
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,172 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class SynchronizeMetadata
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
@force = options[:resync]
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
notice 'Synchronize with arXiv and DOI'
|
14
|
+
|
15
|
+
@bib.each do |entry|
|
16
|
+
next if entry.comment?
|
17
|
+
|
18
|
+
if @force || !(entry[:title] && entry[:author] && entry[:year])
|
19
|
+
determine_arxiv_and_doi(entry)
|
20
|
+
|
21
|
+
if entry[:arxiv]
|
22
|
+
if entry.key == arxiv_id(entry, :prefix => false, :version => true)
|
23
|
+
entry = rename_arxiv_file(entry)
|
24
|
+
next unless entry
|
25
|
+
end
|
26
|
+
update_arxiv(entry)
|
27
|
+
end
|
28
|
+
|
29
|
+
update_doi(entry) if entry[:doi]
|
30
|
+
end
|
31
|
+
|
32
|
+
if @force || (!entry[:abstract] && entry[:doi] =~ /\A10\.1103\//)
|
33
|
+
update_aps_abstract(entry)
|
34
|
+
end
|
35
|
+
|
36
|
+
@bib.save
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def update_aps_abstract(entry)
|
43
|
+
info("Downloading APS abstract", :key => entry)
|
44
|
+
html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
|
45
|
+
entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
|
46
|
+
rescue => ex
|
47
|
+
error('Abstract download failed', :key => entry, :ex => ex)
|
48
|
+
end
|
49
|
+
|
50
|
+
def update_doi(entry)
|
51
|
+
info('Downloading doi.org metadata', :key => entry)
|
52
|
+
text = fetch("http://dx.doi.org/#{entry[:doi]}", 'Accept' => 'text/bibliography; style=bibtex')
|
53
|
+
raise text if text == 'Unknown DOI'
|
54
|
+
Bibliography::Entry.parse(text).each {|k, v| entry[k] = v }
|
55
|
+
rescue => ex
|
56
|
+
entry.delete(:doi)
|
57
|
+
error('doi download failed', :key => entry, :ex => ex)
|
58
|
+
end
|
59
|
+
|
60
|
+
# Rename arxiv file if key contains version
|
61
|
+
def rename_arxiv_file(entry)
|
62
|
+
file = entry.file
|
63
|
+
|
64
|
+
key = arxiv_id(entry, :prefix => false, :version => false)
|
65
|
+
|
66
|
+
if old_entry = @bib[key]
|
67
|
+
# Existing entry found
|
68
|
+
@bib.delete(entry)
|
69
|
+
old_entry[:arxiv] =~ /v(\d+)$/
|
70
|
+
old_version = $1
|
71
|
+
entry[:arxiv] =~ /v(\d+)$/
|
72
|
+
new_version = $1
|
73
|
+
if old_version && new_version && old_version >= new_version
|
74
|
+
info('Not updating existing entry with older version', :key => old_entry)
|
75
|
+
File.delete(file[:path]) if file
|
76
|
+
return nil
|
77
|
+
end
|
78
|
+
|
79
|
+
old_entry[:arxiv] = entry[:arxiv]
|
80
|
+
old_entry[:doi] = entry[:doi]
|
81
|
+
entry = old_entry
|
82
|
+
info('Updating existing entry', :key => entry)
|
83
|
+
else
|
84
|
+
# This is a new entry
|
85
|
+
entry.key = key
|
86
|
+
end
|
87
|
+
|
88
|
+
if file
|
89
|
+
new_path = file[:path].sub(arxiv_id(entry, :prefix => false, :version => true), key)
|
90
|
+
File.rename(file[:path], new_path)
|
91
|
+
entry.file = new_path
|
92
|
+
end
|
93
|
+
|
94
|
+
@bib.save
|
95
|
+
|
96
|
+
entry
|
97
|
+
end
|
98
|
+
|
99
|
+
def update_arxiv(entry)
|
100
|
+
info('Downloading arXiv metadata', :key => entry)
|
101
|
+
xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, :prefix => true, :version => false)}&metadataPrefix=arXiv")
|
102
|
+
error = xml.xpath('//error').map(&:content).first
|
103
|
+
raise error if error
|
104
|
+
|
105
|
+
entry[:title] = xml.xpath('//arXiv/title').map(&:content).first
|
106
|
+
entry[:abstract] = xml.xpath('//arXiv/abstract').map(&:content).first
|
107
|
+
entry[:primaryclass] = xml.xpath('//arXiv/categories').map(&:content).first.split(/\s+/).first
|
108
|
+
entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
|
109
|
+
"{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
|
110
|
+
end.join(' and ')
|
111
|
+
entry[:journal] = ArXivJournal
|
112
|
+
entry[:eprint] = entry[:arxiv]
|
113
|
+
entry[:archiveprefix] = 'arXiv'
|
114
|
+
date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
|
115
|
+
date = Date.parse(date)
|
116
|
+
entry[:year] = date.year
|
117
|
+
entry[:month] = Bibliography::RawValue.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
|
118
|
+
doi = xml.xpath('//arXiv/doi').map(&:content).first
|
119
|
+
entry[:doi] = doi if doi
|
120
|
+
journal = xml.xpath('//arXiv/journal-ref').map(&:content).first
|
121
|
+
entry[:journal] = journal if journal
|
122
|
+
comments = xml.xpath('//arXiv/comments').map(&:content).first
|
123
|
+
entry[:comments] = comments if comments
|
124
|
+
entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
|
125
|
+
rescue => ex
|
126
|
+
entry.delete(:arxiv)
|
127
|
+
error('arXiv download failed', :key => entry, :ex => ex)
|
128
|
+
end
|
129
|
+
|
130
|
+
def determine_arxiv_and_doi(entry)
|
131
|
+
if file = entry.file
|
132
|
+
if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
|
133
|
+
debug('Searching for arXiv or doi identifier in pdf file', :key => entry)
|
134
|
+
text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
|
135
|
+
entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
|
136
|
+
entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
|
137
|
+
end
|
138
|
+
|
139
|
+
if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
|
140
|
+
debug('Interpreting file name as arXiv identifier', :key => entry)
|
141
|
+
entry[:arxiv] = $1
|
142
|
+
end
|
143
|
+
|
144
|
+
if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
|
145
|
+
debug('Interpreting file name as doi identifier', :key => entry)
|
146
|
+
entry[:doi] = "10.1103/#{$1}"
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
if !entry[:arxiv] && entry[:doi]
|
151
|
+
begin
|
152
|
+
info('Fetch missing arXiv identifier', :key => entry)
|
153
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
|
154
|
+
if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
|
155
|
+
id = xml.xpath('//entry/id').map(&:content).first
|
156
|
+
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
157
|
+
entry[:arxiv] = $1
|
158
|
+
end
|
159
|
+
end
|
160
|
+
rescue => ex
|
161
|
+
error('arXiv doi query failed', :ex => ex, :key => entry)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
unless entry[:arxiv] || entry[:doi]
|
166
|
+
warning('No arXiv or doi identifier found', :key => entry)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class Validate
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Bibliography must be set' unless @bib = options[:bib]
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
notice 'Check validity'
|
13
|
+
titles, arxivs = {}, {}
|
14
|
+
|
15
|
+
@bib.each do |entry|
|
16
|
+
next if entry.comment?
|
17
|
+
|
18
|
+
w = []
|
19
|
+
|
20
|
+
file = entry.file
|
21
|
+
w << 'Missing file' unless file && File.file?(file[:path])
|
22
|
+
|
23
|
+
w += [:title, :author, :year, :abstract].reject {|k| entry[k] }.map {|k| "Missing #{k}" }
|
24
|
+
|
25
|
+
w << 'Invalid file' if split_filename(file[:name]).first != entry.key if file
|
26
|
+
|
27
|
+
if entry[:arxiv]
|
28
|
+
id = arxiv_id(entry, :version => false, :prefix => true)
|
29
|
+
if arxivs.include?(id)
|
30
|
+
w << "ArXiv duplicate of '#{arxivs[id]}'"
|
31
|
+
else
|
32
|
+
arxivs[id] = entry.key
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
if entry[:title]
|
37
|
+
if titles.include?(entry[:title])
|
38
|
+
w << "Title duplicate of '#{titles[entry[:title]]}'"
|
39
|
+
else
|
40
|
+
titles[entry[:title]] = entry.key
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
warning(w.join(', '), :key => entry) unless w.empty?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
require 'bibsync/actions/check_versions'
|
2
|
+
require 'bibsync/actions/synchronize_files'
|
3
|
+
require 'bibsync/actions/synchronize_metadata'
|
4
|
+
require 'bibsync/actions/validate'
|
5
|
+
require 'bibsync/actions/jabref_format'
|
6
|
+
require 'bibsync/actions/fetch_from_arxiv'
|
7
|
+
require 'bibsync/actions/find_my_citations'
|
@@ -0,0 +1,244 @@
|
|
1
|
+
module BibSync
|
2
|
+
class Bibliography
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
attr_reader :file
|
6
|
+
|
7
|
+
def initialize(file = nil)
|
8
|
+
@entries, @file = {}, file
|
9
|
+
parse(File.read(@file)) if @file && File.exists?(@file)
|
10
|
+
@dirty = false
|
11
|
+
@save_hooks = []
|
12
|
+
end
|
13
|
+
|
14
|
+
def save_hook(hook)
|
15
|
+
@save_hooks << hook
|
16
|
+
end
|
17
|
+
|
18
|
+
def dirty?
|
19
|
+
@dirty
|
20
|
+
end
|
21
|
+
|
22
|
+
def dirty!
|
23
|
+
@dirty = true
|
24
|
+
end
|
25
|
+
|
26
|
+
def [](key)
|
27
|
+
@entries[key]
|
28
|
+
end
|
29
|
+
|
30
|
+
def delete(entry)
|
31
|
+
if @entries.include?(entry.key)
|
32
|
+
@entries.delete(entry.key)
|
33
|
+
entry.bibliography = nil
|
34
|
+
dirty!
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def relative_path(file)
|
39
|
+
raise 'No filename given' unless @file
|
40
|
+
bibpath = Pathname.new(@file).realpath.parent
|
41
|
+
Pathname.new(file).realpath.relative_path_from(bibpath).to_s
|
42
|
+
end
|
43
|
+
|
44
|
+
def each(&block)
|
45
|
+
@entries.each_value(&block)
|
46
|
+
end
|
47
|
+
|
48
|
+
def save(file = nil)
|
49
|
+
if file
|
50
|
+
@file = file
|
51
|
+
@parent_path = nil
|
52
|
+
@dirty = true
|
53
|
+
end
|
54
|
+
|
55
|
+
raise 'No filename given' unless @file
|
56
|
+
if @dirty
|
57
|
+
@save_hooks.each {|hook| hook.call(self) }
|
58
|
+
File.open("#{@file}.tmp", 'w') {|f| f.write(self) }
|
59
|
+
File.rename("#{@file}.tmp", @file)
|
60
|
+
@dirty = false
|
61
|
+
true
|
62
|
+
else
|
63
|
+
false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def <<(entry)
|
68
|
+
entry.bibliography = self
|
69
|
+
@entries[entry.key] = entry
|
70
|
+
dirty!
|
71
|
+
end
|
72
|
+
|
73
|
+
def parse(text)
|
74
|
+
until text.empty?
|
75
|
+
case text
|
76
|
+
when /\A(\s+|%[^\n]+\n)/
|
77
|
+
text = $'
|
78
|
+
else
|
79
|
+
entry = Entry.new
|
80
|
+
text = entry.parse(text)
|
81
|
+
entry.key ||= @entries.size
|
82
|
+
self << entry
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def to_s
|
88
|
+
"% #{DateTime.now}\n% Encoding: UTF8\n\n" <<
|
89
|
+
@entries.values.join("\n") << "\n"
|
90
|
+
end
|
91
|
+
|
92
|
+
class RawValue < String; end
|
93
|
+
|
94
|
+
class Entry
|
95
|
+
include Enumerable
|
96
|
+
|
97
|
+
attr_accessor :key, :type, :bibliography
|
98
|
+
|
99
|
+
def self.parse(text)
|
100
|
+
entry = Entry.new
|
101
|
+
entry.parse(text)
|
102
|
+
entry
|
103
|
+
end
|
104
|
+
|
105
|
+
def initialize
|
106
|
+
@fields = {}
|
107
|
+
end
|
108
|
+
|
109
|
+
def file=(file)
|
110
|
+
raise 'No bibliography set' unless bibliography
|
111
|
+
file =~ /\.(\w+)$/
|
112
|
+
self[:file] = ":#{bibliography.relative_path(file)}:#{$1.upcase}" # JabRef file format "description:path:type"
|
113
|
+
file
|
114
|
+
end
|
115
|
+
|
116
|
+
def file
|
117
|
+
if self[:file]
|
118
|
+
raise 'No bibliography set' unless bibliography
|
119
|
+
description, file, type = self[:file].split(':', 3)
|
120
|
+
path = (Pathname.new(bibliography.file).realpath.parent + file).to_s
|
121
|
+
{ :name => File.basename(path), :type => type.upcase.to_sym, :path => path }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def [](key)
|
126
|
+
@fields[convert_key(key)]
|
127
|
+
end
|
128
|
+
|
129
|
+
def []=(key, value)
|
130
|
+
if value then
|
131
|
+
key = convert_key(key)
|
132
|
+
value = RawValue === value ? RawValue.new(value.to_s.strip) : value.to_s.strip
|
133
|
+
if @fields[key] != value || @fields[key].class != value.class
|
134
|
+
@fields[key] = value
|
135
|
+
dirty!
|
136
|
+
end
|
137
|
+
else
|
138
|
+
delete(key)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def delete(key)
|
143
|
+
key = convert_key(key)
|
144
|
+
if @fields.include?(key)
|
145
|
+
@fields.delete(key)
|
146
|
+
dirty!
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
def each(&block)
|
151
|
+
@fields.each(&block)
|
152
|
+
end
|
153
|
+
|
154
|
+
def comment?
|
155
|
+
type.to_s.downcase == 'comment'
|
156
|
+
end
|
157
|
+
|
158
|
+
def dirty!
|
159
|
+
bibliography.dirty! if bibliography
|
160
|
+
end
|
161
|
+
|
162
|
+
def to_s
|
163
|
+
s = "@#{type}{"
|
164
|
+
if comment?
|
165
|
+
s << self[:comment]
|
166
|
+
else
|
167
|
+
s << "#{key},\n" << to_a.map {|k,v| RawValue === v ? " #{k} = #{v}" : " #{k} = {#{v}}" }.join(",\n") << "\n"
|
168
|
+
end
|
169
|
+
s << "}\n"
|
170
|
+
end
|
171
|
+
|
172
|
+
def parse(text)
|
173
|
+
raise 'Unexpected token' if text !~ /\A\s*@(\w+)\s*\{/
|
174
|
+
self.type = $1
|
175
|
+
text = $'
|
176
|
+
|
177
|
+
if comment?
|
178
|
+
text, self[:comment] = parse_field(text)
|
179
|
+
else
|
180
|
+
raise 'Expected entry key' if text !~ /([^,]+),\s*/
|
181
|
+
self.key = $1.strip
|
182
|
+
text = $'
|
183
|
+
|
184
|
+
until text.empty?
|
185
|
+
case text
|
186
|
+
when /\A(\s+|%[^\n]+\n)/
|
187
|
+
text = $'
|
188
|
+
when /\A\s*(\w+)\s*=\s*/
|
189
|
+
text, key = $', $1
|
190
|
+
if text =~ /\A\{/
|
191
|
+
text, self[key] = parse_field(text)
|
192
|
+
else
|
193
|
+
text, value = parse_field(text)
|
194
|
+
self[key] = RawValue.new(value)
|
195
|
+
end
|
196
|
+
else
|
197
|
+
break
|
198
|
+
end
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
raise 'Expected closing }' unless text =~ /\A\s*\}/
|
203
|
+
$'
|
204
|
+
end
|
205
|
+
|
206
|
+
private
|
207
|
+
|
208
|
+
def parse_field(text)
|
209
|
+
value = ''
|
210
|
+
count = 0
|
211
|
+
until text.empty?
|
212
|
+
case text
|
213
|
+
when /\A\{/
|
214
|
+
text = $'
|
215
|
+
value << $& if count > 0
|
216
|
+
count += 1
|
217
|
+
when /\A\}/
|
218
|
+
break if count == 0
|
219
|
+
count -= 1
|
220
|
+
text = $'
|
221
|
+
value << $& if count > 0
|
222
|
+
when /\A,/
|
223
|
+
text = $'
|
224
|
+
break if count == 0
|
225
|
+
value << $&
|
226
|
+
when /\A[^\}\{,]+/
|
227
|
+
text = $'
|
228
|
+
value << $&
|
229
|
+
else
|
230
|
+
break
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
raise 'Expected closing }' if count != 0
|
235
|
+
|
236
|
+
return text, value
|
237
|
+
end
|
238
|
+
|
239
|
+
def convert_key(key)
|
240
|
+
key.to_s.downcase.to_sym
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'bibsync'
|
2
|
+
require 'optparse'
|
3
|
+
|
4
|
+
module BibSync
|
5
|
+
class Command
|
6
|
+
def initialize(args)
|
7
|
+
@args = args
|
8
|
+
@options = {}
|
9
|
+
end
|
10
|
+
|
11
|
+
def run
|
12
|
+
@opts = OptionParser.new(&method(:set_opts))
|
13
|
+
@opts.parse!(@args)
|
14
|
+
process
|
15
|
+
exit 0
|
16
|
+
rescue Exception => ex
|
17
|
+
raise ex if Log.trace? || SystemExit === ex
|
18
|
+
$stderr.print "#{ex.class}: " if ex.class != RuntimeError
|
19
|
+
$stderr.puts ex.message
|
20
|
+
$stderr.puts ' Use --trace for backtrace.'
|
21
|
+
exit 1
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def set_opts(opts)
|
27
|
+
opts.banner = "Usage: #{$0} [options]"
|
28
|
+
|
29
|
+
opts.on('-b', '--bib bibfile.bib', 'Set bibliography') do |bib|
|
30
|
+
@options[:bib] = bib
|
31
|
+
end
|
32
|
+
|
33
|
+
opts.on('-d', '--directory directory', 'Set directory') do |dir|
|
34
|
+
@options[:dir] = dir
|
35
|
+
end
|
36
|
+
|
37
|
+
opts.on('-v', '--check-versions', 'Check for updated arXiv papers') do
|
38
|
+
@options[:check_versions] = true
|
39
|
+
end
|
40
|
+
|
41
|
+
opts.on('-u', '--update', 'Update arXiv papers') do
|
42
|
+
@options[:update] = true
|
43
|
+
end
|
44
|
+
|
45
|
+
opts.on('-s', '--sync', 'Synchronize missing metadata') do
|
46
|
+
@options[:sync] = true
|
47
|
+
end
|
48
|
+
|
49
|
+
opts.on('-r', '--resync', 'Force synchronization with arXiv and DOI') do
|
50
|
+
@options[:resync] = true
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on('-m', '--citedbyme directory', 'Find my citations in my TeX files') do |dir|
|
54
|
+
@options[:citedbyme] = dir
|
55
|
+
end
|
56
|
+
|
57
|
+
opts.on('-j', '--jabref', 'Format bibliography file with JabRef') do
|
58
|
+
@options[:jabref] = true
|
59
|
+
end
|
60
|
+
|
61
|
+
opts.on('-f', '--fetch url', 'Fetch arXiv paper as PDF file') do |url|
|
62
|
+
(@options[:fetch] ||= []) << url
|
63
|
+
end
|
64
|
+
|
65
|
+
opts.on('-V', '--verbose', 'Verbose output') do
|
66
|
+
Log.verbose!
|
67
|
+
end
|
68
|
+
|
69
|
+
opts.on('--trace', 'Show a full traceback on error') do
|
70
|
+
Log.trace!
|
71
|
+
end
|
72
|
+
|
73
|
+
opts.on('-h', '--help', 'Display this help') do
|
74
|
+
puts opts
|
75
|
+
exit
|
76
|
+
end
|
77
|
+
|
78
|
+
opts.on('--version', 'Display version information') do
|
79
|
+
puts "BibSync Version #{BibSync::VERSION}"
|
80
|
+
exit
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def process
|
85
|
+
if @args.size != 0
|
86
|
+
error 'Too many arguments'
|
87
|
+
puts @opts
|
88
|
+
exit
|
89
|
+
end
|
90
|
+
|
91
|
+
if @options[:bib]
|
92
|
+
@options[:bib] = Bibliography.new(@options[:bib])
|
93
|
+
@options[:bib].save_hook(Transformer.new)
|
94
|
+
end
|
95
|
+
|
96
|
+
actions = []
|
97
|
+
actions << :FetchFromArXiv if @options[:fetch]
|
98
|
+
actions << :CheckVersions if @options[:check_versions] || @options[:update]
|
99
|
+
actions << :SynchronizeFiles << :SynchronizeMetadata if @options[:sync] || @options[:resync]
|
100
|
+
actions << :FindMyCitations if @options[:citedbyme]
|
101
|
+
actions << :Validate
|
102
|
+
actions << :JabrefFormat if @options[:jabref]
|
103
|
+
|
104
|
+
actions.map {|a| Actions.const_get(a).new(@options) }.each {|a| a.run }
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/bibsync/log.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Log
|
3
|
+
Reset = "\e[0m"
|
4
|
+
Red = "\e[31m"
|
5
|
+
Yellow = "\e[33m"
|
6
|
+
Blue = "\e[36m"
|
7
|
+
|
8
|
+
def self.verbose?
|
9
|
+
@verbose
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.verbose!
|
13
|
+
@verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.trace?
|
17
|
+
@trace
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.trace!
|
21
|
+
@trace = true
|
22
|
+
end
|
23
|
+
|
24
|
+
def debug(message, opts = {})
|
25
|
+
info(message, opts) if Log.verbose?
|
26
|
+
end
|
27
|
+
|
28
|
+
def info(message, opts = {})
|
29
|
+
log(message, opts)
|
30
|
+
end
|
31
|
+
|
32
|
+
def notice(message, opts = {})
|
33
|
+
log(message, opts.merge(:color => Blue))
|
34
|
+
end
|
35
|
+
|
36
|
+
def warning(message, opts = {})
|
37
|
+
log(message, opts.merge(:color => Yellow))
|
38
|
+
end
|
39
|
+
|
40
|
+
def error(message, opts = {})
|
41
|
+
log(message, opts.merge(:color => Red))
|
42
|
+
end
|
43
|
+
|
44
|
+
def log(message, opts = {})
|
45
|
+
if ex = opts[:ex]
|
46
|
+
message = "#{message} - #{ex.message}"
|
47
|
+
end
|
48
|
+
message = "#{opts[:color]}#{message}#{Reset}" if opts[:color]
|
49
|
+
if key = opts[:key]
|
50
|
+
key = key.key if key.respond_to? :key
|
51
|
+
message = "#{key} : #{message}"
|
52
|
+
end
|
53
|
+
puts(message)
|
54
|
+
if Log.trace? && ex = opts[:ex]
|
55
|
+
puts(ex.backtrace.join("\n"))
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module BibSync
|
2
|
+
class Transformer
|
3
|
+
include Utils
|
4
|
+
|
5
|
+
def call(bib)
|
6
|
+
bib.each do |entry|
|
7
|
+
next if entry.comment?
|
8
|
+
|
9
|
+
if entry[:author]
|
10
|
+
entry[:author] = entry[:author].gsub(/\{(\w+)\}/, '\\1').gsub(/#/, ' and ')
|
11
|
+
end
|
12
|
+
|
13
|
+
if entry[:doi] && entry[:doi] =~ /(PhysRev|RevModPhys).*?\.(\d+)$/
|
14
|
+
entry[:publisher] ||= 'American Physical Society'
|
15
|
+
entry[:pages] ||= $2
|
16
|
+
end
|
17
|
+
|
18
|
+
if entry[:publisher] && entry[:publisher] =~ /American Physical Society/i
|
19
|
+
entry[:publisher] = 'American Physical Society'
|
20
|
+
end
|
21
|
+
|
22
|
+
if entry[:month]
|
23
|
+
entry[:month] = Bibliography::RawValue.new(entry[:month].downcase)
|
24
|
+
end
|
25
|
+
|
26
|
+
if entry[:journal]
|
27
|
+
if entry[:journal] =~ /EPL/
|
28
|
+
entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
|
29
|
+
entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |\Z)/
|
30
|
+
entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |\Z)/
|
31
|
+
entry[:journal] = 'Europhysics Letters'
|
32
|
+
end
|
33
|
+
|
34
|
+
if entry[:journal] =~ /(Phys\.|Physical) (Rev\.|Review) Lett[^ ]+ /
|
35
|
+
entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
|
36
|
+
entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |,|\Z)/
|
37
|
+
entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |,|\Z)/
|
38
|
+
entry[:journal] = 'Physical Review Letters'
|
39
|
+
end
|
40
|
+
|
41
|
+
if entry[:journal] =~ /(Phys\.|Physical) (Rev\.|Review) (\w) /
|
42
|
+
letter = $3
|
43
|
+
entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
|
44
|
+
entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |,|\Z)/
|
45
|
+
entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |,|\Z)/
|
46
|
+
entry[:journal] = "Physical Review #{letter}"
|
47
|
+
end
|
48
|
+
|
49
|
+
case entry[:journal]
|
50
|
+
when /\APhysical Review (\w)\Z/i
|
51
|
+
entry[:shortjournal] = "PR#{$1.upcase}"
|
52
|
+
when /\APhysical Review Letters\Z/i
|
53
|
+
entry[:shortjournal] = 'PRL'
|
54
|
+
when /\AReviews of Modern Physics\Z/i
|
55
|
+
entry[:shortjournal] = 'RMP'
|
56
|
+
when /\ANew Journal of Physics\Z/i
|
57
|
+
entry[:shortjournal] = 'NJP'
|
58
|
+
when /\A#{ArXivJournal}\Z/i
|
59
|
+
entry[:shortjournal] = 'arXiv'
|
60
|
+
when /\AEurophysics Letters\Z/i
|
61
|
+
entry[:shortjournal] = 'EPL'
|
62
|
+
else
|
63
|
+
entry[:shortjournal] = entry[:journal]
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Utils
|
3
|
+
ArXivJournal = 'ArXiv e-prints'
|
4
|
+
|
5
|
+
def split_filename(file)
|
6
|
+
file =~ /^(.*?)\.(\w+)$/
|
7
|
+
return $1, $2.upcase
|
8
|
+
end
|
9
|
+
|
10
|
+
def fetch(url, headers = {})
|
11
|
+
# open(url, headers) {|f| f.read }
|
12
|
+
headers = headers.map {|k,v| '-H ' + Shellwords.escape("#{k}: #{v}") }.join(' ')
|
13
|
+
result = `curl --stderr - -S -s -L #{headers} #{Shellwords.escape url}`
|
14
|
+
raise result.chomp if $? != 0
|
15
|
+
result
|
16
|
+
end
|
17
|
+
|
18
|
+
def arxiv_download(dir, id)
|
19
|
+
url = "http://arxiv.org/pdf/#{id}"
|
20
|
+
file = File.join(dir, "#{arxiv_id(id, :version => true, :prefix => false)}.pdf")
|
21
|
+
result = `curl --stderr - -S -s -L -o #{Shellwords.escape file} #{Shellwords.escape url}`
|
22
|
+
raise result.chomp if $? != 0
|
23
|
+
end
|
24
|
+
|
25
|
+
def fetch_xml(url, headers = {})
|
26
|
+
xml = Nokogiri::XML(fetch(url, headers))
|
27
|
+
xml.remove_namespaces!
|
28
|
+
xml
|
29
|
+
end
|
30
|
+
|
31
|
+
def fetch_html(url, headers = {})
|
32
|
+
Nokogiri::HTML(fetch(url, headers))
|
33
|
+
end
|
34
|
+
|
35
|
+
def arxiv_id(arxiv, opts = {})
|
36
|
+
raise unless opts.include?(:prefix) && opts.include?(:version)
|
37
|
+
arxiv = arxiv[:arxiv] if Bibliography::Entry === arxiv
|
38
|
+
if arxiv
|
39
|
+
arxiv = arxiv.sub(/^.*\//, '') unless opts[:prefix]
|
40
|
+
arxiv = arxiv.sub(/v\d+$/, '') unless opts[:version]
|
41
|
+
arxiv
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
data/lib/bibsync.rb
ADDED
metadata
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bibsync
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Daniel Mendler
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-04-04 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: BibSync is a tool to synchronize scientific papers and bibtex bibliography
|
31
|
+
files
|
32
|
+
email:
|
33
|
+
- mail@daniel-mendler.de
|
34
|
+
executables:
|
35
|
+
- bibsync
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- .gitignore
|
40
|
+
- bibsync.gemspec
|
41
|
+
- bin/bibsync
|
42
|
+
- lib/bibsync.rb
|
43
|
+
- lib/bibsync/actions.rb
|
44
|
+
- lib/bibsync/actions/check_versions.rb
|
45
|
+
- lib/bibsync/actions/fetch_from_arxiv.rb
|
46
|
+
- lib/bibsync/actions/find_my_citations.rb
|
47
|
+
- lib/bibsync/actions/jabref_format.rb
|
48
|
+
- lib/bibsync/actions/synchronize_files.rb
|
49
|
+
- lib/bibsync/actions/synchronize_metadata.rb
|
50
|
+
- lib/bibsync/actions/validate.rb
|
51
|
+
- lib/bibsync/bibliography.rb
|
52
|
+
- lib/bibsync/command.rb
|
53
|
+
- lib/bibsync/log.rb
|
54
|
+
- lib/bibsync/transformer.rb
|
55
|
+
- lib/bibsync/utils.rb
|
56
|
+
- lib/bibsync/version.rb
|
57
|
+
homepage: https://github.com/minad/bibsync
|
58
|
+
licenses: []
|
59
|
+
post_install_message:
|
60
|
+
rdoc_options: []
|
61
|
+
require_paths:
|
62
|
+
- lib
|
63
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
64
|
+
none: false
|
65
|
+
requirements:
|
66
|
+
- - ! '>='
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
70
|
+
none: false
|
71
|
+
requirements:
|
72
|
+
- - ! '>='
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
75
|
+
requirements: []
|
76
|
+
rubyforge_project: bibsync
|
77
|
+
rubygems_version: 1.8.24
|
78
|
+
signing_key:
|
79
|
+
specification_version: 3
|
80
|
+
summary: BibSync is a tool to synchronize scientific papers and bibtex bibliography
|
81
|
+
files
|
82
|
+
test_files: []
|