bibsync 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.swp
2
+ *.gem
3
+ Gemfile.lock
4
+ .bundle
5
+ .yardoc
6
+
data/bibsync.gemspec ADDED
@@ -0,0 +1,21 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.dirname(__FILE__) + '/lib/bibsync/version'
3
+ require 'date'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'bibsync'
7
+ s.version = BibSync::VERSION
8
+ s.date = Date.today.to_s
9
+ s.authors = ['Daniel Mendler']
10
+ s.email = ['mail@daniel-mendler.de']
11
+ s.summary = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
12
+ s.description = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
13
+ s.homepage = 'https://github.com/minad/bibsync'
14
+ s.rubyforge_project = s.name
15
+
16
+ s.files = `git ls-files`.split("\n")
17
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
+ s.require_paths = %w(lib)
19
+
20
+ s.add_runtime_dependency('nokogiri')
21
+ end
data/bin/bibsync ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bibsync/command'
4
+
5
+ BibSync::Command.new(ARGV).run
@@ -0,0 +1,35 @@
1
+ module BibSync
2
+ module Actions
3
+ class CheckVersions
4
+ include Log
5
+ include Utils
6
+
7
+ SliceSize = 20
8
+
9
+ def initialize(options)
10
+ raise 'Bibliography must be set' unless @bib = options[:bib]
11
+ raise 'Directory must be set' unless @dir = options[:dir]
12
+ @update = options[:update]
13
+ end
14
+
15
+ def run
16
+ notice 'Check for newer version on arXiv'
17
+ @bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
18
+ begin
19
+ xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, :version => false, :prefix => true) }.join(',')}&max_results=#{SliceSize}")
20
+ xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
21
+ id.gsub!('http://arxiv.org/abs/', '')
22
+ if id != entry[i][:arxiv]
23
+ info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", :key => entry[i])
24
+ arxiv_download(@dir, id) if @update
25
+ end
26
+ end
27
+ rescue => ex
28
+ error('arXiv query failed', :ex => ex)
29
+ end
30
+ end
31
+
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,52 @@
1
+ module BibSync
2
+ module Actions
3
+ class FetchFromArXiv
4
+ SliceSize = 20
5
+
6
+ include Log
7
+ include Utils
8
+
9
+ def initialize(options)
10
+ raise 'Fetch must be set' unless @fetch = options[:fetch]
11
+ raise 'Directory must be set' unless @dir = options[:dir]
12
+ end
13
+
14
+ def run
15
+ ids = []
16
+ urls = []
17
+
18
+ @fetch.each do |url|
19
+ if url =~ %r{^http://arxiv.org/abs/(\d+\.\d+)$}
20
+ ids << $1
21
+ else
22
+ urls << url
23
+ end
24
+ end
25
+
26
+ unless urls.empty?
27
+ notice 'Starting browser for non-arXiv urls'
28
+ urls.each do |url|
29
+ info "Opening #{url}"
30
+ `xdg-open #{Shellwords.escape url}`
31
+ end
32
+ end
33
+
34
+ unless ids.empty?
35
+ notice 'Downloading from arXiv'
36
+ ids.each_slice(SliceSize) do |ids|
37
+ begin
38
+ xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{ids.join(',')}&max_results=#{SliceSize}")
39
+ xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
40
+ id.gsub!('http://arxiv.org/abs/', '')
41
+ info 'arXiv download', :key => id
42
+ arxiv_download(@dir, id)
43
+ end
44
+ rescue => ex
45
+ error('arXiv query failed', :ex => ex)
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,46 @@
1
+ module BibSync
2
+ module Actions
3
+ class FindMyCitations
4
+ include Log
5
+ include Utils
6
+
7
+ def initialize(options)
8
+ raise 'Bibliography must be set' unless @bib = options[:bib]
9
+ raise 'Tex directory must be set' unless @dir = options[:citedbyme]
10
+ raise "#{@dir} is not a directory" unless File.directory?(@dir)
11
+ end
12
+
13
+ def run
14
+ notice 'Find citations in my TeX files'
15
+
16
+ cites = {}
17
+ Dir[File.join(@dir, '**/*.tex')].each do |file|
18
+ File.read(file).scan(/cite\{([^\}]+)\}/) do
19
+ $1.split(/\s*,\s*/).each do |key|
20
+ key.strip!
21
+ file = @bib.relative_path(file)
22
+ debug("Cited in #{file}", :key => key)
23
+ (cites[key] ||= []) << file
24
+ end
25
+ end
26
+ end
27
+
28
+ @bib.each do |entry|
29
+ next if entry.comment?
30
+ entry.delete(:cites) unless cites.include?(entry.key)
31
+ end
32
+
33
+ cites.each do |key, files|
34
+ files = files.sort.uniq.join(';')
35
+ if @bib[key]
36
+ @bib[key][:citedbyme] = files
37
+ else
38
+ warning("Cited in #{files} but not found in #{@bib.file}", :key => key)
39
+ end
40
+ end
41
+
42
+ @bib.save
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,22 @@
1
+ module BibSync
2
+ module Actions
3
+ class JabrefFormat
4
+ include Utils
5
+ include Log
6
+
7
+ def initialize(options)
8
+ raise 'Bibliography must be set' unless @bib = options[:bib]
9
+ end
10
+
11
+ def run
12
+ @bib.save
13
+ if File.read(@bib.file, 80) !~ /JabRef/
14
+ notice 'Transforming file with JabRef'
15
+ tmp_file = "#{@bib.file}.tmp.bib"
16
+ `jabref --nogui --import #{Shellwords.escape @bib.file} --output #{Shellwords.escape tmp_file} 2>&1 >/dev/null`
17
+ File.rename(tmp_file, @bib.file) if File.exists?(tmp_file)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,41 @@
1
+ module BibSync
2
+ module Actions
3
+ class SynchronizeFiles
4
+ include Utils
5
+ include Log
6
+
7
+ FileTypes = %w(djvu pdf ps)
8
+
9
+ def initialize(options)
10
+ raise 'Bibliography must be set' unless @bib = options[:bib]
11
+ raise 'Directory must be set' unless @dir = options[:dir]
12
+ end
13
+
14
+ def run
15
+ notice 'Synchronize with files'
16
+
17
+ files = {}
18
+ Dir[File.join(@dir, "**/*.{#{FileTypes.join(',')}}")].sort.each do |file|
19
+ name = File.basename(file)
20
+ key, type = split_filename(name)
21
+ raise "Duplicate file #{name}" if files[key]
22
+ files[key] = file
23
+ end
24
+
25
+ files.each do |key, file|
26
+ unless entry = @bib[key]
27
+ info('New file', :key => key)
28
+ entry = Bibliography::Entry.new
29
+ entry.key = key
30
+ @bib << entry
31
+ end
32
+
33
+ entry.type ||= :ARTICLE
34
+ entry.file = file
35
+ end
36
+
37
+ @bib.save
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,172 @@
1
+ module BibSync
2
+ module Actions
3
+ class SynchronizeMetadata
4
+ include Utils
5
+ include Log
6
+
7
+ def initialize(options)
8
+ raise 'Bibliography must be set' unless @bib = options[:bib]
9
+ @force = options[:resync]
10
+ end
11
+
12
+ def run
13
+ notice 'Synchronize with arXiv and DOI'
14
+
15
+ @bib.each do |entry|
16
+ next if entry.comment?
17
+
18
+ if @force || !(entry[:title] && entry[:author] && entry[:year])
19
+ determine_arxiv_and_doi(entry)
20
+
21
+ if entry[:arxiv]
22
+ if entry.key == arxiv_id(entry, :prefix => false, :version => true)
23
+ entry = rename_arxiv_file(entry)
24
+ next unless entry
25
+ end
26
+ update_arxiv(entry)
27
+ end
28
+
29
+ update_doi(entry) if entry[:doi]
30
+ end
31
+
32
+ if @force || (!entry[:abstract] && entry[:doi] =~ /\A10\.1103\//)
33
+ update_aps_abstract(entry)
34
+ end
35
+
36
+ @bib.save
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def update_aps_abstract(entry)
43
+ info("Downloading APS abstract", :key => entry)
44
+ html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
45
+ entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
46
+ rescue => ex
47
+ error('Abstract download failed', :key => entry, :ex => ex)
48
+ end
49
+
50
+ def update_doi(entry)
51
+ info('Downloading doi.org metadata', :key => entry)
52
+ text = fetch("http://dx.doi.org/#{entry[:doi]}", 'Accept' => 'text/bibliography; style=bibtex')
53
+ raise text if text == 'Unknown DOI'
54
+ Bibliography::Entry.parse(text).each {|k, v| entry[k] = v }
55
+ rescue => ex
56
+ entry.delete(:doi)
57
+ error('doi download failed', :key => entry, :ex => ex)
58
+ end
59
+
60
+ # Rename arxiv file if key contains version
61
+ def rename_arxiv_file(entry)
62
+ file = entry.file
63
+
64
+ key = arxiv_id(entry, :prefix => false, :version => false)
65
+
66
+ if old_entry = @bib[key]
67
+ # Existing entry found
68
+ @bib.delete(entry)
69
+ old_entry[:arxiv] =~ /v(\d+)$/
70
+ old_version = $1
71
+ entry[:arxiv] =~ /v(\d+)$/
72
+ new_version = $1
73
+ if old_version && new_version && old_version >= new_version
74
+ info('Not updating existing entry with older version', :key => old_entry)
75
+ File.delete(file[:path]) if file
76
+ return nil
77
+ end
78
+
79
+ old_entry[:arxiv] = entry[:arxiv]
80
+ old_entry[:doi] = entry[:doi]
81
+ entry = old_entry
82
+ info('Updating existing entry', :key => entry)
83
+ else
84
+ # This is a new entry
85
+ entry.key = key
86
+ end
87
+
88
+ if file
89
+ new_path = file[:path].sub(arxiv_id(entry, :prefix => false, :version => true), key)
90
+ File.rename(file[:path], new_path)
91
+ entry.file = new_path
92
+ end
93
+
94
+ @bib.save
95
+
96
+ entry
97
+ end
98
+
99
+ def update_arxiv(entry)
100
+ info('Downloading arXiv metadata', :key => entry)
101
+ xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, :prefix => true, :version => false)}&metadataPrefix=arXiv")
102
+ error = xml.xpath('//error').map(&:content).first
103
+ raise error if error
104
+
105
+ entry[:title] = xml.xpath('//arXiv/title').map(&:content).first
106
+ entry[:abstract] = xml.xpath('//arXiv/abstract').map(&:content).first
107
+ entry[:primaryclass] = xml.xpath('//arXiv/categories').map(&:content).first.split(/\s+/).first
108
+ entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
109
+ "{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
110
+ end.join(' and ')
111
+ entry[:journal] = ArXivJournal
112
+ entry[:eprint] = entry[:arxiv]
113
+ entry[:archiveprefix] = 'arXiv'
114
+ date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
115
+ date = Date.parse(date)
116
+ entry[:year] = date.year
117
+ entry[:month] = Bibliography::RawValue.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
118
+ doi = xml.xpath('//arXiv/doi').map(&:content).first
119
+ entry[:doi] = doi if doi
120
+ journal = xml.xpath('//arXiv/journal-ref').map(&:content).first
121
+ entry[:journal] = journal if journal
122
+ comments = xml.xpath('//arXiv/comments').map(&:content).first
123
+ entry[:comments] = comments if comments
124
+ entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
125
+ rescue => ex
126
+ entry.delete(:arxiv)
127
+ error('arXiv download failed', :key => entry, :ex => ex)
128
+ end
129
+
130
+ def determine_arxiv_and_doi(entry)
131
+ if file = entry.file
132
+ if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
133
+ debug('Searching for arXiv or doi identifier in pdf file', :key => entry)
134
+ text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
135
+ entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
136
+ entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
137
+ end
138
+
139
+ if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
140
+ debug('Interpreting file name as arXiv identifier', :key => entry)
141
+ entry[:arxiv] = $1
142
+ end
143
+
144
+ if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
145
+ debug('Interpreting file name as doi identifier', :key => entry)
146
+ entry[:doi] = "10.1103/#{$1}"
147
+ end
148
+ end
149
+
150
+ if !entry[:arxiv] && entry[:doi]
151
+ begin
152
+ info('Fetch missing arXiv identifier', :key => entry)
153
+ xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
154
+ if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
155
+ id = xml.xpath('//entry/id').map(&:content).first
156
+ if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
157
+ entry[:arxiv] = $1
158
+ end
159
+ end
160
+ rescue => ex
161
+ error('arXiv doi query failed', :ex => ex, :key => entry)
162
+ end
163
+ end
164
+
165
+ unless entry[:arxiv] || entry[:doi]
166
+ warning('No arXiv or doi identifier found', :key => entry)
167
+ end
168
+ end
169
+
170
+ end
171
+ end
172
+ end
@@ -0,0 +1,49 @@
1
+ module BibSync
2
+ module Actions
3
+ class Validate
4
+ include Utils
5
+ include Log
6
+
7
+ def initialize(options)
8
+ raise 'Bibliography must be set' unless @bib = options[:bib]
9
+ end
10
+
11
+ def run
12
+ notice 'Check validity'
13
+ titles, arxivs = {}, {}
14
+
15
+ @bib.each do |entry|
16
+ next if entry.comment?
17
+
18
+ w = []
19
+
20
+ file = entry.file
21
+ w << 'Missing file' unless file && File.file?(file[:path])
22
+
23
+ w += [:title, :author, :year, :abstract].reject {|k| entry[k] }.map {|k| "Missing #{k}" }
24
+
25
+ w << 'Invalid file' if split_filename(file[:name]).first != entry.key if file
26
+
27
+ if entry[:arxiv]
28
+ id = arxiv_id(entry, :version => false, :prefix => true)
29
+ if arxivs.include?(id)
30
+ w << "ArXiv duplicate of '#{arxivs[id]}'"
31
+ else
32
+ arxivs[id] = entry.key
33
+ end
34
+ end
35
+
36
+ if entry[:title]
37
+ if titles.include?(entry[:title])
38
+ w << "Title duplicate of '#{titles[entry[:title]]}'"
39
+ else
40
+ titles[entry[:title]] = entry.key
41
+ end
42
+ end
43
+
44
+ warning(w.join(', '), :key => entry) unless w.empty?
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,7 @@
1
+ require 'bibsync/actions/check_versions'
2
+ require 'bibsync/actions/synchronize_files'
3
+ require 'bibsync/actions/synchronize_metadata'
4
+ require 'bibsync/actions/validate'
5
+ require 'bibsync/actions/jabref_format'
6
+ require 'bibsync/actions/fetch_from_arxiv'
7
+ require 'bibsync/actions/find_my_citations'
@@ -0,0 +1,244 @@
1
+ module BibSync
2
+ class Bibliography
3
+ include Enumerable
4
+
5
+ attr_reader :file
6
+
7
+ def initialize(file = nil)
8
+ @entries, @file = {}, file
9
+ parse(File.read(@file)) if @file && File.exists?(@file)
10
+ @dirty = false
11
+ @save_hooks = []
12
+ end
13
+
14
+ def save_hook(hook)
15
+ @save_hooks << hook
16
+ end
17
+
18
+ def dirty?
19
+ @dirty
20
+ end
21
+
22
+ def dirty!
23
+ @dirty = true
24
+ end
25
+
26
+ def [](key)
27
+ @entries[key]
28
+ end
29
+
30
+ def delete(entry)
31
+ if @entries.include?(entry.key)
32
+ @entries.delete(entry.key)
33
+ entry.bibliography = nil
34
+ dirty!
35
+ end
36
+ end
37
+
38
+ def relative_path(file)
39
+ raise 'No filename given' unless @file
40
+ bibpath = Pathname.new(@file).realpath.parent
41
+ Pathname.new(file).realpath.relative_path_from(bibpath).to_s
42
+ end
43
+
44
+ def each(&block)
45
+ @entries.each_value(&block)
46
+ end
47
+
48
+ def save(file = nil)
49
+ if file
50
+ @file = file
51
+ @parent_path = nil
52
+ @dirty = true
53
+ end
54
+
55
+ raise 'No filename given' unless @file
56
+ if @dirty
57
+ @save_hooks.each {|hook| hook.call(self) }
58
+ File.open("#{@file}.tmp", 'w') {|f| f.write(self) }
59
+ File.rename("#{@file}.tmp", @file)
60
+ @dirty = false
61
+ true
62
+ else
63
+ false
64
+ end
65
+ end
66
+
67
+ def <<(entry)
68
+ entry.bibliography = self
69
+ @entries[entry.key] = entry
70
+ dirty!
71
+ end
72
+
73
+ def parse(text)
74
+ until text.empty?
75
+ case text
76
+ when /\A(\s+|%[^\n]+\n)/
77
+ text = $'
78
+ else
79
+ entry = Entry.new
80
+ text = entry.parse(text)
81
+ entry.key ||= @entries.size
82
+ self << entry
83
+ end
84
+ end
85
+ end
86
+
87
+ def to_s
88
+ "% #{DateTime.now}\n% Encoding: UTF8\n\n" <<
89
+ @entries.values.join("\n") << "\n"
90
+ end
91
+
92
+ class RawValue < String; end
93
+
94
+ class Entry
95
+ include Enumerable
96
+
97
+ attr_accessor :key, :type, :bibliography
98
+
99
+ def self.parse(text)
100
+ entry = Entry.new
101
+ entry.parse(text)
102
+ entry
103
+ end
104
+
105
+ def initialize
106
+ @fields = {}
107
+ end
108
+
109
+ def file=(file)
110
+ raise 'No bibliography set' unless bibliography
111
+ file =~ /\.(\w+)$/
112
+ self[:file] = ":#{bibliography.relative_path(file)}:#{$1.upcase}" # JabRef file format "description:path:type"
113
+ file
114
+ end
115
+
116
+ def file
117
+ if self[:file]
118
+ raise 'No bibliography set' unless bibliography
119
+ description, file, type = self[:file].split(':', 3)
120
+ path = (Pathname.new(bibliography.file).realpath.parent + file).to_s
121
+ { :name => File.basename(path), :type => type.upcase.to_sym, :path => path }
122
+ end
123
+ end
124
+
125
+ def [](key)
126
+ @fields[convert_key(key)]
127
+ end
128
+
129
+ def []=(key, value)
130
+ if value then
131
+ key = convert_key(key)
132
+ value = RawValue === value ? RawValue.new(value.to_s.strip) : value.to_s.strip
133
+ if @fields[key] != value || @fields[key].class != value.class
134
+ @fields[key] = value
135
+ dirty!
136
+ end
137
+ else
138
+ delete(key)
139
+ end
140
+ end
141
+
142
+ def delete(key)
143
+ key = convert_key(key)
144
+ if @fields.include?(key)
145
+ @fields.delete(key)
146
+ dirty!
147
+ end
148
+ end
149
+
150
+ def each(&block)
151
+ @fields.each(&block)
152
+ end
153
+
154
+ def comment?
155
+ type.to_s.downcase == 'comment'
156
+ end
157
+
158
+ def dirty!
159
+ bibliography.dirty! if bibliography
160
+ end
161
+
162
+ def to_s
163
+ s = "@#{type}{"
164
+ if comment?
165
+ s << self[:comment]
166
+ else
167
+ s << "#{key},\n" << to_a.map {|k,v| RawValue === v ? " #{k} = #{v}" : " #{k} = {#{v}}" }.join(",\n") << "\n"
168
+ end
169
+ s << "}\n"
170
+ end
171
+
172
+ def parse(text)
173
+ raise 'Unexpected token' if text !~ /\A\s*@(\w+)\s*\{/
174
+ self.type = $1
175
+ text = $'
176
+
177
+ if comment?
178
+ text, self[:comment] = parse_field(text)
179
+ else
180
+ raise 'Expected entry key' if text !~ /([^,]+),\s*/
181
+ self.key = $1.strip
182
+ text = $'
183
+
184
+ until text.empty?
185
+ case text
186
+ when /\A(\s+|%[^\n]+\n)/
187
+ text = $'
188
+ when /\A\s*(\w+)\s*=\s*/
189
+ text, key = $', $1
190
+ if text =~ /\A\{/
191
+ text, self[key] = parse_field(text)
192
+ else
193
+ text, value = parse_field(text)
194
+ self[key] = RawValue.new(value)
195
+ end
196
+ else
197
+ break
198
+ end
199
+ end
200
+ end
201
+
202
+ raise 'Expected closing }' unless text =~ /\A\s*\}/
203
+ $'
204
+ end
205
+
206
+ private
207
+
208
+ def parse_field(text)
209
+ value = ''
210
+ count = 0
211
+ until text.empty?
212
+ case text
213
+ when /\A\{/
214
+ text = $'
215
+ value << $& if count > 0
216
+ count += 1
217
+ when /\A\}/
218
+ break if count == 0
219
+ count -= 1
220
+ text = $'
221
+ value << $& if count > 0
222
+ when /\A,/
223
+ text = $'
224
+ break if count == 0
225
+ value << $&
226
+ when /\A[^\}\{,]+/
227
+ text = $'
228
+ value << $&
229
+ else
230
+ break
231
+ end
232
+ end
233
+
234
+ raise 'Expected closing }' if count != 0
235
+
236
+ return text, value
237
+ end
238
+
239
+ def convert_key(key)
240
+ key.to_s.downcase.to_sym
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,107 @@
1
+ require 'bibsync'
2
+ require 'optparse'
3
+
4
+ module BibSync
5
+ class Command
6
+ def initialize(args)
7
+ @args = args
8
+ @options = {}
9
+ end
10
+
11
+ def run
12
+ @opts = OptionParser.new(&method(:set_opts))
13
+ @opts.parse!(@args)
14
+ process
15
+ exit 0
16
+ rescue Exception => ex
17
+ raise ex if Log.trace? || SystemExit === ex
18
+ $stderr.print "#{ex.class}: " if ex.class != RuntimeError
19
+ $stderr.puts ex.message
20
+ $stderr.puts ' Use --trace for backtrace.'
21
+ exit 1
22
+ end
23
+
24
+ private
25
+
26
+ def set_opts(opts)
27
+ opts.banner = "Usage: #{$0} [options]"
28
+
29
+ opts.on('-b', '--bib bibfile.bib', 'Set bibliography') do |bib|
30
+ @options[:bib] = bib
31
+ end
32
+
33
+ opts.on('-d', '--directory directory', 'Set directory') do |dir|
34
+ @options[:dir] = dir
35
+ end
36
+
37
+ opts.on('-v', '--check-versions', 'Check for updated arXiv papers') do
38
+ @options[:check_versions] = true
39
+ end
40
+
41
+ opts.on('-u', '--update', 'Update arXiv papers') do
42
+ @options[:update] = true
43
+ end
44
+
45
+ opts.on('-s', '--sync', 'Synchronize missing metadata') do
46
+ @options[:sync] = true
47
+ end
48
+
49
+ opts.on('-r', '--resync', 'Force synchronization with arXiv and DOI') do
50
+ @options[:resync] = true
51
+ end
52
+
53
+ opts.on('-m', '--citedbyme directory', 'Find my citations in my TeX files') do |dir|
54
+ @options[:citedbyme] = dir
55
+ end
56
+
57
+ opts.on('-j', '--jabref', 'Format bibliography file with JabRef') do
58
+ @options[:jabref] = true
59
+ end
60
+
61
+ opts.on('-f', '--fetch url', 'Fetch arXiv paper as PDF file') do |url|
62
+ (@options[:fetch] ||= []) << url
63
+ end
64
+
65
+ opts.on('-V', '--verbose', 'Verbose output') do
66
+ Log.verbose!
67
+ end
68
+
69
+ opts.on('--trace', 'Show a full traceback on error') do
70
+ Log.trace!
71
+ end
72
+
73
+ opts.on('-h', '--help', 'Display this help') do
74
+ puts opts
75
+ exit
76
+ end
77
+
78
+ opts.on('--version', 'Display version information') do
79
+ puts "BibSync Version #{BibSync::VERSION}"
80
+ exit
81
+ end
82
+ end
83
+
84
+ def process
85
+ if @args.size != 0
86
+ error 'Too many arguments'
87
+ puts @opts
88
+ exit
89
+ end
90
+
91
+ if @options[:bib]
92
+ @options[:bib] = Bibliography.new(@options[:bib])
93
+ @options[:bib].save_hook(Transformer.new)
94
+ end
95
+
96
+ actions = []
97
+ actions << :FetchFromArXiv if @options[:fetch]
98
+ actions << :CheckVersions if @options[:check_versions] || @options[:update]
99
+ actions << :SynchronizeFiles << :SynchronizeMetadata if @options[:sync] || @options[:resync]
100
+ actions << :FindMyCitations if @options[:citedbyme]
101
+ actions << :Validate
102
+ actions << :JabrefFormat if @options[:jabref]
103
+
104
+ actions.map {|a| Actions.const_get(a).new(@options) }.each {|a| a.run }
105
+ end
106
+ end
107
+ end
@@ -0,0 +1,59 @@
1
+ module BibSync
2
+ module Log
3
+ Reset = "\e[0m"
4
+ Red = "\e[31m"
5
+ Yellow = "\e[33m"
6
+ Blue = "\e[36m"
7
+
8
+ def self.verbose?
9
+ @verbose
10
+ end
11
+
12
+ def self.verbose!
13
+ @verbose = true
14
+ end
15
+
16
+ def self.trace?
17
+ @trace
18
+ end
19
+
20
+ def self.trace!
21
+ @trace = true
22
+ end
23
+
24
+ def debug(message, opts = {})
25
+ info(message, opts) if Log.verbose?
26
+ end
27
+
28
+ def info(message, opts = {})
29
+ log(message, opts)
30
+ end
31
+
32
+ def notice(message, opts = {})
33
+ log(message, opts.merge(:color => Blue))
34
+ end
35
+
36
+ def warning(message, opts = {})
37
+ log(message, opts.merge(:color => Yellow))
38
+ end
39
+
40
+ def error(message, opts = {})
41
+ log(message, opts.merge(:color => Red))
42
+ end
43
+
44
+ def log(message, opts = {})
45
+ if ex = opts[:ex]
46
+ message = "#{message} - #{ex.message}"
47
+ end
48
+ message = "#{opts[:color]}#{message}#{Reset}" if opts[:color]
49
+ if key = opts[:key]
50
+ key = key.key if key.respond_to? :key
51
+ message = "#{key} : #{message}"
52
+ end
53
+ puts(message)
54
+ if Log.trace? && ex = opts[:ex]
55
+ puts(ex.backtrace.join("\n"))
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,69 @@
1
+ module BibSync
2
+ class Transformer
3
+ include Utils
4
+
5
+ def call(bib)
6
+ bib.each do |entry|
7
+ next if entry.comment?
8
+
9
+ if entry[:author]
10
+ entry[:author] = entry[:author].gsub(/\{(\w+)\}/, '\\1').gsub(/#/, ' and ')
11
+ end
12
+
13
+ if entry[:doi] && entry[:doi] =~ /(PhysRev|RevModPhys).*?\.(\d+)$/
14
+ entry[:publisher] ||= 'American Physical Society'
15
+ entry[:pages] ||= $2
16
+ end
17
+
18
+ if entry[:publisher] && entry[:publisher] =~ /American Physical Society/i
19
+ entry[:publisher] = 'American Physical Society'
20
+ end
21
+
22
+ if entry[:month]
23
+ entry[:month] = Bibliography::RawValue.new(entry[:month].downcase)
24
+ end
25
+
26
+ if entry[:journal]
27
+ if entry[:journal] =~ /EPL/
28
+ entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
29
+ entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |\Z)/
30
+ entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |\Z)/
31
+ entry[:journal] = 'Europhysics Letters'
32
+ end
33
+
34
+ if entry[:journal] =~ /(Phys\.|Physical) (Rev\.|Review) Lett[^ ]+ /
35
+ entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
36
+ entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |,|\Z)/
37
+ entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |,|\Z)/
38
+ entry[:journal] = 'Physical Review Letters'
39
+ end
40
+
41
+ if entry[:journal] =~ /(Phys\.|Physical) (Rev\.|Review) (\w) /
42
+ letter = $3
43
+ entry[:year] = $1 if entry[:journal] =~ /\((\d{4})\)/
44
+ entry[:pages] = $1 if entry[:journal] =~ / (\d{5,10})( |,|\Z)/
45
+ entry[:volume] = $1 if entry[:journal] =~ / (\d{2,4})( |,|\Z)/
46
+ entry[:journal] = "Physical Review #{letter}"
47
+ end
48
+
49
+ case entry[:journal]
50
+ when /\APhysical Review (\w)\Z/i
51
+ entry[:shortjournal] = "PR#{$1.upcase}"
52
+ when /\APhysical Review Letters\Z/i
53
+ entry[:shortjournal] = 'PRL'
54
+ when /\AReviews of Modern Physics\Z/i
55
+ entry[:shortjournal] = 'RMP'
56
+ when /\ANew Journal of Physics\Z/i
57
+ entry[:shortjournal] = 'NJP'
58
+ when /\A#{ArXivJournal}\Z/i
59
+ entry[:shortjournal] = 'arXiv'
60
+ when /\AEurophysics Letters\Z/i
61
+ entry[:shortjournal] = 'EPL'
62
+ else
63
+ entry[:shortjournal] = entry[:journal]
64
+ end
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,45 @@
1
+ module BibSync
2
+ module Utils
3
+ ArXivJournal = 'ArXiv e-prints'
4
+
5
+ def split_filename(file)
6
+ file =~ /^(.*?)\.(\w+)$/
7
+ return $1, $2.upcase
8
+ end
9
+
10
+ def fetch(url, headers = {})
11
+ # open(url, headers) {|f| f.read }
12
+ headers = headers.map {|k,v| '-H ' + Shellwords.escape("#{k}: #{v}") }.join(' ')
13
+ result = `curl --stderr - -S -s -L #{headers} #{Shellwords.escape url}`
14
+ raise result.chomp if $? != 0
15
+ result
16
+ end
17
+
18
+ def arxiv_download(dir, id)
19
+ url = "http://arxiv.org/pdf/#{id}"
20
+ file = File.join(dir, "#{arxiv_id(id, :version => true, :prefix => false)}.pdf")
21
+ result = `curl --stderr - -S -s -L -o #{Shellwords.escape file} #{Shellwords.escape url}`
22
+ raise result.chomp if $? != 0
23
+ end
24
+
25
+ def fetch_xml(url, headers = {})
26
+ xml = Nokogiri::XML(fetch(url, headers))
27
+ xml.remove_namespaces!
28
+ xml
29
+ end
30
+
31
+ def fetch_html(url, headers = {})
32
+ Nokogiri::HTML(fetch(url, headers))
33
+ end
34
+
35
+ def arxiv_id(arxiv, opts = {})
36
+ raise unless opts.include?(:prefix) && opts.include?(:version)
37
+ arxiv = arxiv[:arxiv] if Bibliography::Entry === arxiv
38
+ if arxiv
39
+ arxiv = arxiv.sub(/^.*\//, '') unless opts[:prefix]
40
+ arxiv = arxiv.sub(/v\d+$/, '') unless opts[:version]
41
+ arxiv
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,3 @@
1
+ module BibSync
2
+ VERSION = '0.0.1'
3
+ end
data/lib/bibsync.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'nokogiri'
2
+ require 'shellwords'
3
+ require 'date'
4
+ require 'pathname'
5
+ require 'bibsync/version'
6
+ require 'bibsync/utils'
7
+ require 'bibsync/transformer'
8
+ require 'bibsync/bibliography'
9
+ require 'bibsync/log'
10
+ require 'bibsync/actions'
metadata ADDED
@@ -0,0 +1,82 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bibsync
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Daniel Mendler
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-04 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: BibSync is a tool to synchronize scientific papers and bibtex bibliography
31
+ files
32
+ email:
33
+ - mail@daniel-mendler.de
34
+ executables:
35
+ - bibsync
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - .gitignore
40
+ - bibsync.gemspec
41
+ - bin/bibsync
42
+ - lib/bibsync.rb
43
+ - lib/bibsync/actions.rb
44
+ - lib/bibsync/actions/check_versions.rb
45
+ - lib/bibsync/actions/fetch_from_arxiv.rb
46
+ - lib/bibsync/actions/find_my_citations.rb
47
+ - lib/bibsync/actions/jabref_format.rb
48
+ - lib/bibsync/actions/synchronize_files.rb
49
+ - lib/bibsync/actions/synchronize_metadata.rb
50
+ - lib/bibsync/actions/validate.rb
51
+ - lib/bibsync/bibliography.rb
52
+ - lib/bibsync/command.rb
53
+ - lib/bibsync/log.rb
54
+ - lib/bibsync/transformer.rb
55
+ - lib/bibsync/utils.rb
56
+ - lib/bibsync/version.rb
57
+ homepage: https://github.com/minad/bibsync
58
+ licenses: []
59
+ post_install_message:
60
+ rdoc_options: []
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ none: false
65
+ requirements:
66
+ - - ! '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ required_rubygems_version: !ruby/object:Gem::Requirement
70
+ none: false
71
+ requirements:
72
+ - - ! '>='
73
+ - !ruby/object:Gem::Version
74
+ version: '0'
75
+ requirements: []
76
+ rubyforge_project: bibsync
77
+ rubygems_version: 1.8.24
78
+ signing_key:
79
+ specification_version: 3
80
+ summary: BibSync is a tool to synchronize scientific papers and bibtex bibliography
81
+ files
82
+ test_files: []