bibsync 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +10 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +88 -0
- data/Rakefile +16 -0
- data/bibsync.gemspec +4 -2
- data/lib/bibsync/actions/{check_versions.rb → check_arxiv_versions.rb} +6 -6
- data/lib/bibsync/actions/determine_arxiv_doi.rb +70 -0
- data/lib/bibsync/actions/fetch_from_arxiv.rb +11 -9
- data/lib/bibsync/actions/find_my_citations.rb +4 -4
- data/lib/bibsync/actions/jabref_format.rb +2 -2
- data/lib/bibsync/actions/synchronize_files.rb +5 -6
- data/lib/bibsync/actions/synchronize_metadata.rb +14 -57
- data/lib/bibsync/actions/validate.rb +16 -6
- data/lib/bibsync/actions.rb +1 -7
- data/lib/bibsync/bibliography.rb +60 -23
- data/lib/bibsync/command.rb +13 -8
- data/lib/bibsync/log.rb +22 -20
- data/lib/bibsync/transformer.rb +1 -1
- data/lib/bibsync/utils.rb +7 -9
- data/lib/bibsync/version.rb +1 -1
- data/test/actions/test_check_arxiv_versions.rb +4 -0
- data/test/actions/test_determine_arxiv_doi.rb +61 -0
- data/test/actions/test_fetch_from_arxiv.rb +4 -0
- data/test/actions/test_find_my_citations.rb +4 -0
- data/test/actions/test_jabref_format.rb +4 -0
- data/test/actions/test_synchronize_files.rb +4 -0
- data/test/actions/test_synchronize_metadata.rb +34 -0
- data/test/actions/test_validate.rb +4 -0
- data/test/fixture/FileWithEmbeddedArXiv.pdf +0 -0
- data/test/fixture/FileWithEmbeddedArXiv.tex +7 -0
- data/test/fixture/FileWithEmbeddedDOI.pdf +0 -0
- data/test/fixture/FileWithEmbeddedDOI.tex +7 -0
- data/test/fixture/entry.bib +8 -0
- data/test/fixture/test.bib +34 -0
- data/test/helper.rb +21 -0
- data/test/test_bibliography.rb +222 -0
- data/test/test_utils.rb +54 -0
- metadata +63 -16
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 21c0564e45a66b0339bf5b7bafaef25633adcd8e
|
4
|
+
data.tar.gz: 34a9ae41d395ba912e95accbb0c2cb87c8de69c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 46e87958376899e94b3bec951241bb02e3085f2e5141146477d9f8bb0de533e27c0c8d07b4c4e5a314f2d734cd386ee092362d38307e18a4d49303a746ad42c0
|
7
|
+
data.tar.gz: 46b15a8cf96461ce3fa3e45aef1585e6806177be668a20410ca940687c5d1f5667d30b1e76701b37503984c0a5ca0aa23025728b0154ee306c868ba8bcdaf1db
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2013 Daniel Mendler
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
BibSync
|
2
|
+
=======
|
3
|
+
|
4
|
+
BibSync is a tool to synchronize your paper database with a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file which might be most
|
5
|
+
useful for Physicists and Mathematicians since it supports synchronization with [DOI](http://dx.doi.org/) and [arXiv](http://arxiv.org/).
|
6
|
+
|
7
|
+
I created this tool during the work on my diploma thesis in physics since I was unhappy
|
8
|
+
with existing tools like [Mendeley](http://www.mendeley.com/). I use this tool together with Git for version control
|
9
|
+
and [JabRef](http://jabref.sourceforge.net/) for browsing. This tool adheres more to the Unix philosophy that a small tool
|
10
|
+
for each task is better than one thing which tries to solve everything. If you use [JabRef](http://jabref.sourceforge.net/)
|
11
|
+
for browsing and tagging it is unnecessary to sort the papers into different sub directories by hand.
|
12
|
+
Just throw them all in one directory!
|
13
|
+
|
14
|
+
__Note__: This tool is derived from a script which I used during my thesis. It worked
|
15
|
+
quite well and reliable during that time. But be aware that I used Git for version control
|
16
|
+
of the [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file. So any mistakes which might be made by this tool could be reverted.
|
17
|
+
|
18
|
+
Features
|
19
|
+
--------
|
20
|
+
|
21
|
+
BibSync supports the following features:
|
22
|
+
|
23
|
+
* Synchronization between a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file and a directory containing the papers in pdf, ps or djvu format
|
24
|
+
* [JabRef](http://jabref.sourceforge.net/) file fields are generated, so you can open the existing papers directly out of [JabRef](http://jabref.sourceforge.net/)
|
25
|
+
* Downloading of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) metadata
|
26
|
+
* Extraction of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) id out of the file using [pdftotext](http://en.wikipedia.org/wiki/Pdftotext)
|
27
|
+
* Downloading of new versions of [arXiv](http://arxiv.org/) papers
|
28
|
+
* Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
|
29
|
+
* Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
|
30
|
+
* Works under every platform supporting Ruby (Linux, Windows, ...)
|
31
|
+
|
32
|
+
Quick start
|
33
|
+
-----------
|
34
|
+
|
35
|
+
BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
|
36
|
+
the command line
|
37
|
+
|
38
|
+
~~~
|
39
|
+
$ gem install bibsync
|
40
|
+
~~~
|
41
|
+
|
42
|
+
After that you can use the 'bibsync' tool on the command line. At first let's validate
|
43
|
+
a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.
|
44
|
+
|
45
|
+
~~~
|
46
|
+
$ bibsync -b ~/thesis/thesis.bib
|
47
|
+
~~~
|
48
|
+
|
49
|
+
Then we want to synchronize all the papers in our paper directory with 'bibsync' and automatically download
|
50
|
+
the missing metadata.
|
51
|
+
|
52
|
+
~~~
|
53
|
+
$ bibsync -d ~/thesis/papers -b ~/thesis/thesis.bib
|
54
|
+
~~~
|
55
|
+
|
56
|
+
BibSync tries to download the metadata from [arxiv.org](http://arxiv.org) and [dx.doi.org](http://dx.doi.org). If you want to know more about the functions of 'bibsync' take a look at the command line help.
|
57
|
+
|
58
|
+
~~~
|
59
|
+
$ bibsync --help
|
60
|
+
~~~
|
61
|
+
|
62
|
+
My setup
|
63
|
+
--------
|
64
|
+
|
65
|
+
* BibSync for synchronizing
|
66
|
+
* [JabRef](http://jabref.sourceforge.net/) for browsing the bibliography, tagging and categorizing papers
|
67
|
+
* [Biblatex](http://www.ctan.org/pkg/biblatex) to include a bibliography in LaTeX with full Unicode support
|
68
|
+
|
69
|
+
Alternatives
|
70
|
+
------------
|
71
|
+
|
72
|
+
* [Mendeley](http://www.mendeley.com/) (Commercial, synchronizes with their server, limited disk space, bloated gui application)
|
73
|
+
* [Zotero](http://www.zotero.org/) (Firefox plugin, Open source)
|
74
|
+
|
75
|
+
A better name?
|
76
|
+
--------------
|
77
|
+
|
78
|
+
If you have a suggestion for a better name, just let me know...
|
79
|
+
|
80
|
+
Author
|
81
|
+
------
|
82
|
+
|
83
|
+
Daniel Mendler
|
84
|
+
|
85
|
+
License
|
86
|
+
-------
|
87
|
+
|
88
|
+
See LICENSE
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler'
|
3
|
+
Bundler::GemHelper.install_tasks
|
4
|
+
rescue Exception
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rake/testtask'
|
8
|
+
|
9
|
+
Rake::TestTask.new :test do |t|
|
10
|
+
t.libs << 'lib' << 'test'
|
11
|
+
t.test_files = FileList['test/**/test_*.rb']
|
12
|
+
t.verbose = true
|
13
|
+
t.ruby_opts << '-w' << '-v'
|
14
|
+
end
|
15
|
+
|
16
|
+
task :default => :test
|
data/bibsync.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.date = Date.today.to_s
|
9
9
|
s.authors = ['Daniel Mendler']
|
10
10
|
s.email = ['mail@daniel-mendler.de']
|
11
|
-
s.summary = 'BibSync is a tool to synchronize scientific papers and
|
12
|
-
s.description = 'BibSync is a tool to synchronize scientific papers and
|
11
|
+
s.summary = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
12
|
+
s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
13
13
|
s.homepage = 'https://github.com/minad/bibsync'
|
14
14
|
s.rubyforge_project = s.name
|
15
15
|
|
@@ -18,4 +18,6 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.require_paths = %w(lib)
|
19
19
|
|
20
20
|
s.add_runtime_dependency('nokogiri')
|
21
|
+
s.add_development_dependency('rake')
|
22
|
+
s.add_development_dependency('minitest')
|
21
23
|
end
|
@@ -1,14 +1,14 @@
|
|
1
1
|
module BibSync
|
2
2
|
module Actions
|
3
|
-
class
|
3
|
+
class CheckArXivVersions
|
4
4
|
include Log
|
5
5
|
include Utils
|
6
6
|
|
7
7
|
SliceSize = 20
|
8
8
|
|
9
9
|
def initialize(options)
|
10
|
-
raise '
|
11
|
-
raise '
|
10
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
11
|
+
raise 'Option :dir is required' unless @dir = options[:dir]
|
12
12
|
@update = options[:update]
|
13
13
|
end
|
14
14
|
|
@@ -16,16 +16,16 @@ module BibSync
|
|
16
16
|
notice 'Check for newer version on arXiv'
|
17
17
|
@bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
|
18
18
|
begin
|
19
|
-
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, :
|
19
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(',')}&max_results=#{SliceSize}")
|
20
20
|
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
21
21
|
id.gsub!('http://arxiv.org/abs/', '')
|
22
22
|
if id != entry[i][:arxiv]
|
23
|
-
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", :
|
23
|
+
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
|
24
24
|
arxiv_download(@dir, id) if @update
|
25
25
|
end
|
26
26
|
end
|
27
27
|
rescue => ex
|
28
|
-
error('arXiv query failed', :
|
28
|
+
error('arXiv query failed', ex: ex)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class DetermineArXivDOI
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
|
+
@force = options[:resync]
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
notice 'Determine arXiv and DOI identifiers'
|
14
|
+
|
15
|
+
@bib.each do |entry|
|
16
|
+
next if entry.comment? ||
|
17
|
+
(entry[:doi] && entry[:arxiv]) ||
|
18
|
+
(!@force && entry[:title] && entry[:author] && entry[:year])
|
19
|
+
|
20
|
+
determine_arxiv_and_doi(entry)
|
21
|
+
|
22
|
+
@bib.save
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def determine_arxiv_and_doi(entry)
|
29
|
+
if file = entry.file
|
30
|
+
if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
|
31
|
+
debug('Searching for arXiv or doi identifier in pdf file', key: entry)
|
32
|
+
text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
|
33
|
+
entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
|
34
|
+
entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
|
35
|
+
end
|
36
|
+
|
37
|
+
if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
|
38
|
+
debug('Interpreting file name as arXiv identifier', key: entry)
|
39
|
+
entry[:arxiv] = $1
|
40
|
+
end
|
41
|
+
|
42
|
+
if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
|
43
|
+
debug('Interpreting file name as doi identifier', key: entry)
|
44
|
+
entry[:doi] = "10.1103/#{$1}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
if !entry[:arxiv] && entry[:doi]
|
49
|
+
begin
|
50
|
+
info('Fetch missing arXiv identifier', key: entry)
|
51
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
|
52
|
+
if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
|
53
|
+
id = xml.xpath('//entry/id').map(&:content).first
|
54
|
+
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
55
|
+
entry[:arxiv] = $1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
rescue => ex
|
59
|
+
error('arXiv query by DOI failed', ex: ex, key: entry)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
unless entry[:arxiv] || entry[:doi]
|
64
|
+
warning('No arXiv or DOI identifier found', key: entry)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -7,17 +7,19 @@ module BibSync
|
|
7
7
|
include Utils
|
8
8
|
|
9
9
|
def initialize(options)
|
10
|
-
raise '
|
11
|
-
raise '
|
10
|
+
raise 'Option :fetch is required' unless @fetch = options[:fetch]
|
11
|
+
raise 'Option :dir is required' unless @dir = options[:dir]
|
12
12
|
end
|
13
13
|
|
14
14
|
def run
|
15
|
-
|
15
|
+
arxivs = []
|
16
16
|
urls = []
|
17
17
|
|
18
18
|
@fetch.each do |url|
|
19
|
-
if url =~
|
20
|
-
|
19
|
+
if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
|
20
|
+
arxivs << $1
|
21
|
+
elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
|
22
|
+
arxivs << $1
|
21
23
|
else
|
22
24
|
urls << url
|
23
25
|
end
|
@@ -31,18 +33,18 @@ module BibSync
|
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|
34
|
-
unless
|
36
|
+
unless arxivs.empty?
|
35
37
|
notice 'Downloading from arXiv'
|
36
|
-
|
38
|
+
arxivs.each_slice(SliceSize) do |ids|
|
37
39
|
begin
|
38
40
|
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{ids.join(',')}&max_results=#{SliceSize}")
|
39
41
|
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
40
42
|
id.gsub!('http://arxiv.org/abs/', '')
|
41
|
-
info 'arXiv download', :
|
43
|
+
info 'arXiv download', key: id
|
42
44
|
arxiv_download(@dir, id)
|
43
45
|
end
|
44
46
|
rescue => ex
|
45
|
-
error('arXiv query failed', :
|
47
|
+
error('arXiv query failed', ex: ex)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
end
|
@@ -5,8 +5,8 @@ module BibSync
|
|
5
5
|
include Utils
|
6
6
|
|
7
7
|
def initialize(options)
|
8
|
-
raise '
|
9
|
-
raise '
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
|
+
raise 'Option :citedbyme is required' unless @dir = options[:citedbyme]
|
10
10
|
raise "#{@dir} is not a directory" unless File.directory?(@dir)
|
11
11
|
end
|
12
12
|
|
@@ -19,7 +19,7 @@ module BibSync
|
|
19
19
|
$1.split(/\s*,\s*/).each do |key|
|
20
20
|
key.strip!
|
21
21
|
file = @bib.relative_path(file)
|
22
|
-
debug("Cited in #{file}", :
|
22
|
+
debug("Cited in #{file}", key: key)
|
23
23
|
(cites[key] ||= []) << file
|
24
24
|
end
|
25
25
|
end
|
@@ -35,7 +35,7 @@ module BibSync
|
|
35
35
|
if @bib[key]
|
36
36
|
@bib[key][:citedbyme] = files
|
37
37
|
else
|
38
|
-
warning("Cited in #{files} but not found in #{@bib.file}", :
|
38
|
+
warning("Cited in #{files} but not found in #{@bib.file}", key: key)
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module BibSync
|
2
2
|
module Actions
|
3
|
-
class
|
3
|
+
class JabRefFormat
|
4
4
|
include Utils
|
5
5
|
include Log
|
6
6
|
|
7
7
|
def initialize(options)
|
8
|
-
raise '
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
9
|
end
|
10
10
|
|
11
11
|
def run
|
@@ -7,8 +7,8 @@ module BibSync
|
|
7
7
|
FileTypes = %w(djvu pdf ps)
|
8
8
|
|
9
9
|
def initialize(options)
|
10
|
-
raise '
|
11
|
-
raise '
|
10
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
11
|
+
raise 'Option :dir is required' unless @dir = options[:dir]
|
12
12
|
end
|
13
13
|
|
14
14
|
def run
|
@@ -17,16 +17,15 @@ module BibSync
|
|
17
17
|
files = {}
|
18
18
|
Dir[File.join(@dir, "**/*.{#{FileTypes.join(',')}}")].sort.each do |file|
|
19
19
|
name = File.basename(file)
|
20
|
-
key
|
20
|
+
key = name_without_ext(name)
|
21
21
|
raise "Duplicate file #{name}" if files[key]
|
22
22
|
files[key] = file
|
23
23
|
end
|
24
24
|
|
25
25
|
files.each do |key, file|
|
26
26
|
unless entry = @bib[key]
|
27
|
-
info('New file', :
|
28
|
-
entry = Bibliography::Entry.new
|
29
|
-
entry.key = key
|
27
|
+
info('New file', key: key)
|
28
|
+
entry = Bibliography::Entry.new(key: key)
|
30
29
|
@bib << entry
|
31
30
|
end
|
32
31
|
|
@@ -5,7 +5,7 @@ module BibSync
|
|
5
5
|
include Log
|
6
6
|
|
7
7
|
def initialize(options)
|
8
|
-
raise '
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
9
|
@force = options[:resync]
|
10
10
|
end
|
11
11
|
|
@@ -16,10 +16,8 @@ module BibSync
|
|
16
16
|
next if entry.comment?
|
17
17
|
|
18
18
|
if @force || !(entry[:title] && entry[:author] && entry[:year])
|
19
|
-
determine_arxiv_and_doi(entry)
|
20
|
-
|
21
19
|
if entry[:arxiv]
|
22
|
-
if entry.key == arxiv_id(entry, :
|
20
|
+
if entry.key == arxiv_id(entry, prefix: false, version: true)
|
23
21
|
entry = rename_arxiv_file(entry)
|
24
22
|
next unless entry
|
25
23
|
end
|
@@ -40,28 +38,28 @@ module BibSync
|
|
40
38
|
private
|
41
39
|
|
42
40
|
def update_aps_abstract(entry)
|
43
|
-
info("Downloading APS abstract", :
|
41
|
+
info("Downloading APS abstract", key: entry)
|
44
42
|
html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
|
45
43
|
entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
|
46
44
|
rescue => ex
|
47
|
-
error('Abstract download failed', :
|
45
|
+
error('Abstract download failed', key: entry, ex: ex)
|
48
46
|
end
|
49
47
|
|
50
48
|
def update_doi(entry)
|
51
|
-
info('Downloading
|
49
|
+
info('Downloading DOI metadata', key: entry)
|
52
50
|
text = fetch("http://dx.doi.org/#{entry[:doi]}", 'Accept' => 'text/bibliography; style=bibtex')
|
53
51
|
raise text if text == 'Unknown DOI'
|
54
52
|
Bibliography::Entry.parse(text).each {|k, v| entry[k] = v }
|
55
53
|
rescue => ex
|
56
54
|
entry.delete(:doi)
|
57
|
-
error('
|
55
|
+
error('DOI download failed', key: entry, ex: ex)
|
58
56
|
end
|
59
57
|
|
60
58
|
# Rename arxiv file if key contains version
|
61
59
|
def rename_arxiv_file(entry)
|
62
60
|
file = entry.file
|
63
61
|
|
64
|
-
key = arxiv_id(entry, :
|
62
|
+
key = arxiv_id(entry, prefix: false, version: false)
|
65
63
|
|
66
64
|
if old_entry = @bib[key]
|
67
65
|
# Existing entry found
|
@@ -71,7 +69,7 @@ module BibSync
|
|
71
69
|
entry[:arxiv] =~ /v(\d+)$/
|
72
70
|
new_version = $1
|
73
71
|
if old_version && new_version && old_version >= new_version
|
74
|
-
info('Not updating existing entry with older version', :
|
72
|
+
info('Not updating existing entry with older version', key: old_entry)
|
75
73
|
File.delete(file[:path]) if file
|
76
74
|
return nil
|
77
75
|
end
|
@@ -79,14 +77,14 @@ module BibSync
|
|
79
77
|
old_entry[:arxiv] = entry[:arxiv]
|
80
78
|
old_entry[:doi] = entry[:doi]
|
81
79
|
entry = old_entry
|
82
|
-
info('Updating existing entry', :
|
80
|
+
info('Updating existing entry', key: entry)
|
83
81
|
else
|
84
82
|
# This is a new entry
|
85
83
|
entry.key = key
|
86
84
|
end
|
87
85
|
|
88
86
|
if file
|
89
|
-
new_path = file[:path].sub(arxiv_id(entry, :
|
87
|
+
new_path = file[:path].sub(arxiv_id(entry, prefix: false, version: true), key)
|
90
88
|
File.rename(file[:path], new_path)
|
91
89
|
entry.file = new_path
|
92
90
|
end
|
@@ -97,8 +95,8 @@ module BibSync
|
|
97
95
|
end
|
98
96
|
|
99
97
|
def update_arxiv(entry)
|
100
|
-
info('Downloading arXiv metadata', :
|
101
|
-
xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, :
|
98
|
+
info('Downloading arXiv metadata', key: entry)
|
99
|
+
xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}&metadataPrefix=arXiv")
|
102
100
|
error = xml.xpath('//error').map(&:content).first
|
103
101
|
raise error if error
|
104
102
|
|
@@ -108,7 +106,7 @@ module BibSync
|
|
108
106
|
entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
|
109
107
|
"{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
|
110
108
|
end.join(' and ')
|
111
|
-
entry[:journal] =
|
109
|
+
entry[:journal] = 'ArXiv e-prints'
|
112
110
|
entry[:eprint] = entry[:arxiv]
|
113
111
|
entry[:archiveprefix] = 'arXiv'
|
114
112
|
date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
|
@@ -124,49 +122,8 @@ module BibSync
|
|
124
122
|
entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
|
125
123
|
rescue => ex
|
126
124
|
entry.delete(:arxiv)
|
127
|
-
error('arXiv download failed', :
|
125
|
+
error('arXiv download failed', key: entry, ex: ex)
|
128
126
|
end
|
129
|
-
|
130
|
-
def determine_arxiv_and_doi(entry)
|
131
|
-
if file = entry.file
|
132
|
-
if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
|
133
|
-
debug('Searching for arXiv or doi identifier in pdf file', :key => entry)
|
134
|
-
text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
|
135
|
-
entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
|
136
|
-
entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
|
137
|
-
end
|
138
|
-
|
139
|
-
if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
|
140
|
-
debug('Interpreting file name as arXiv identifier', :key => entry)
|
141
|
-
entry[:arxiv] = $1
|
142
|
-
end
|
143
|
-
|
144
|
-
if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
|
145
|
-
debug('Interpreting file name as doi identifier', :key => entry)
|
146
|
-
entry[:doi] = "10.1103/#{$1}"
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
if !entry[:arxiv] && entry[:doi]
|
151
|
-
begin
|
152
|
-
info('Fetch missing arXiv identifier', :key => entry)
|
153
|
-
xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
|
154
|
-
if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
|
155
|
-
id = xml.xpath('//entry/id').map(&:content).first
|
156
|
-
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
157
|
-
entry[:arxiv] = $1
|
158
|
-
end
|
159
|
-
end
|
160
|
-
rescue => ex
|
161
|
-
error('arXiv doi query failed', :ex => ex, :key => entry)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
unless entry[:arxiv] || entry[:doi]
|
166
|
-
warning('No arXiv or doi identifier found', :key => entry)
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
127
|
end
|
171
128
|
end
|
172
129
|
end
|
@@ -10,7 +10,7 @@ module BibSync
|
|
10
10
|
|
11
11
|
def run
|
12
12
|
notice 'Check validity'
|
13
|
-
titles, arxivs = {}, {}
|
13
|
+
titles, arxivs, dois = {}, {}, {}
|
14
14
|
|
15
15
|
@bib.each do |entry|
|
16
16
|
next if entry.comment?
|
@@ -18,14 +18,16 @@ module BibSync
|
|
18
18
|
w = []
|
19
19
|
|
20
20
|
file = entry.file
|
21
|
-
w << 'Missing file' unless file && File.file?(file[:path])
|
22
21
|
|
23
|
-
|
22
|
+
missing = []
|
23
|
+
missing << :file unless file && File.file?(file[:path])
|
24
|
+
missing += [:title, :author, :year, :abstract].reject {|k| entry[k] }
|
25
|
+
w << "Missing #{missing.map(&:to_s).sort.join(', ')}" unless missing.empty?
|
24
26
|
|
25
|
-
w << '
|
27
|
+
w << 'File name does not match entry key' if name_without_ext(file[:name]) != entry.key if file
|
26
28
|
|
27
29
|
if entry[:arxiv]
|
28
|
-
id = arxiv_id(entry, :
|
30
|
+
id = arxiv_id(entry, version: false, prefix: true)
|
29
31
|
if arxivs.include?(id)
|
30
32
|
w << "ArXiv duplicate of '#{arxivs[id]}'"
|
31
33
|
else
|
@@ -33,6 +35,14 @@ module BibSync
|
|
33
35
|
end
|
34
36
|
end
|
35
37
|
|
38
|
+
if id = entry[:doi]
|
39
|
+
if dois.include?(id)
|
40
|
+
w << "DOI duplicate of '#{dois[id]}'"
|
41
|
+
else
|
42
|
+
dois[id] = entry.key
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
36
46
|
if entry[:title]
|
37
47
|
if titles.include?(entry[:title])
|
38
48
|
w << "Title duplicate of '#{titles[entry[:title]]}'"
|
@@ -41,7 +51,7 @@ module BibSync
|
|
41
51
|
end
|
42
52
|
end
|
43
53
|
|
44
|
-
warning(w.join('
|
54
|
+
warning(w.join('; '), key: entry) unless w.empty?
|
45
55
|
end
|
46
56
|
end
|
47
57
|
end
|
data/lib/bibsync/actions.rb
CHANGED
@@ -1,7 +1 @@
|
|
1
|
-
|
2
|
-
require 'bibsync/actions/synchronize_files'
|
3
|
-
require 'bibsync/actions/synchronize_metadata'
|
4
|
-
require 'bibsync/actions/validate'
|
5
|
-
require 'bibsync/actions/jabref_format'
|
6
|
-
require 'bibsync/actions/fetch_from_arxiv'
|
7
|
-
require 'bibsync/actions/find_my_citations'
|
1
|
+
Dir[File.join(File.dirname(__FILE__), 'actions', '*.rb')].each {|f| require f }
|