bibsync 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +10 -0
- data/Gemfile +3 -0
- data/LICENSE +21 -0
- data/README.md +88 -0
- data/Rakefile +16 -0
- data/bibsync.gemspec +4 -2
- data/lib/bibsync/actions/{check_versions.rb → check_arxiv_versions.rb} +6 -6
- data/lib/bibsync/actions/determine_arxiv_doi.rb +70 -0
- data/lib/bibsync/actions/fetch_from_arxiv.rb +11 -9
- data/lib/bibsync/actions/find_my_citations.rb +4 -4
- data/lib/bibsync/actions/jabref_format.rb +2 -2
- data/lib/bibsync/actions/synchronize_files.rb +5 -6
- data/lib/bibsync/actions/synchronize_metadata.rb +14 -57
- data/lib/bibsync/actions/validate.rb +16 -6
- data/lib/bibsync/actions.rb +1 -7
- data/lib/bibsync/bibliography.rb +60 -23
- data/lib/bibsync/command.rb +13 -8
- data/lib/bibsync/log.rb +22 -20
- data/lib/bibsync/transformer.rb +1 -1
- data/lib/bibsync/utils.rb +7 -9
- data/lib/bibsync/version.rb +1 -1
- data/test/actions/test_check_arxiv_versions.rb +4 -0
- data/test/actions/test_determine_arxiv_doi.rb +61 -0
- data/test/actions/test_fetch_from_arxiv.rb +4 -0
- data/test/actions/test_find_my_citations.rb +4 -0
- data/test/actions/test_jabref_format.rb +4 -0
- data/test/actions/test_synchronize_files.rb +4 -0
- data/test/actions/test_synchronize_metadata.rb +34 -0
- data/test/actions/test_validate.rb +4 -0
- data/test/fixture/FileWithEmbeddedArXiv.pdf +0 -0
- data/test/fixture/FileWithEmbeddedArXiv.tex +7 -0
- data/test/fixture/FileWithEmbeddedDOI.pdf +0 -0
- data/test/fixture/FileWithEmbeddedDOI.tex +7 -0
- data/test/fixture/entry.bib +8 -0
- data/test/fixture/test.bib +34 -0
- data/test/helper.rb +21 -0
- data/test/test_bibliography.rb +222 -0
- data/test/test_utils.rb +54 -0
- metadata +63 -16
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 21c0564e45a66b0339bf5b7bafaef25633adcd8e
|
4
|
+
data.tar.gz: 34a9ae41d395ba912e95accbb0c2cb87c8de69c3
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 46e87958376899e94b3bec951241bb02e3085f2e5141146477d9f8bb0de533e27c0c8d07b4c4e5a314f2d734cd386ee092362d38307e18a4d49303a746ad42c0
|
7
|
+
data.tar.gz: 46b15a8cf96461ce3fa3e45aef1585e6806177be668a20410ca940687c5d1f5667d30b1e76701b37503984c0a5ca0aa23025728b0154ee306c868ba8bcdaf1db
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2013 Daniel Mendler
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
BibSync
|
2
|
+
=======
|
3
|
+
|
4
|
+
BibSync is a tool to synchronize your paper database with a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file which might be most
|
5
|
+
useful for Physicists and Mathematicians since it supports synchronization with [DOI](http://dx.doi.org/) and [arXiv](http://arxiv.org/).
|
6
|
+
|
7
|
+
I created this tool during the work on my diploma thesis in physics since I was unhappy
|
8
|
+
with existing tools like [Mendeley](http://www.mendeley.com/). I use this tool together with Git for version control
|
9
|
+
and [JabRef](http://jabref.sourceforge.net/) for browsing. This tool adheres more to the Unix philosophy that a small tool
|
10
|
+
for each task is better than one thing which tries to solve everything. If you use [JabRef](http://jabref.sourceforge.net/)
|
11
|
+
for browsing and tagging it is unnecessary to sort the papers into different sub directories by hand.
|
12
|
+
Just throw them all in one directory!
|
13
|
+
|
14
|
+
__Note__: This tool is derived from a script which I used during my thesis. It worked
|
15
|
+
quite well and reliable during that time. But be aware that I used Git for version control
|
16
|
+
of the [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file. So any mistakes which might be made by this tool could be reverted.
|
17
|
+
|
18
|
+
Features
|
19
|
+
--------
|
20
|
+
|
21
|
+
BibSync supports the following features:
|
22
|
+
|
23
|
+
* Synchronization between a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file and a directory containing the papers in pdf, ps or djvu format
|
24
|
+
* [JabRef](http://jabref.sourceforge.net/) file fields are generated, so you can open the existing papers directly out of [JabRef](http://jabref.sourceforge.net/)
|
25
|
+
* Downloading of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) metadata
|
26
|
+
* Extraction of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) id out of the file using [pdftotext](http://en.wikipedia.org/wiki/Pdftotext)
|
27
|
+
* Downloading of new versions of [arXiv](http://arxiv.org/) papers
|
28
|
+
* Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
|
29
|
+
* Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
|
30
|
+
* Works under every platform supporting Ruby (Linux, Windows, ...)
|
31
|
+
|
32
|
+
Quick start
|
33
|
+
-----------
|
34
|
+
|
35
|
+
BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
|
36
|
+
the command line
|
37
|
+
|
38
|
+
~~~
|
39
|
+
$ gem install bibsync
|
40
|
+
~~~
|
41
|
+
|
42
|
+
After that you can use the 'bibsync' tool on the command line. At first let's validate
|
43
|
+
a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.
|
44
|
+
|
45
|
+
~~~
|
46
|
+
$ bibsync -b ~/thesis/thesis.bib
|
47
|
+
~~~
|
48
|
+
|
49
|
+
Then we want to synchronize all the papers in our paper directory with 'bibsync' and automatically download
|
50
|
+
the missing metadata.
|
51
|
+
|
52
|
+
~~~
|
53
|
+
$ bibsync -d ~/thesis/papers -b ~/thesis/thesis.bib
|
54
|
+
~~~
|
55
|
+
|
56
|
+
BibSync tries to download the metadata from [arxiv.org](http://arxiv.org) and [dx.doi.org](http://dx.doi.org). If you want to know more about the functions of 'bibsync' take a look at the command line help.
|
57
|
+
|
58
|
+
~~~
|
59
|
+
$ bibsync --help
|
60
|
+
~~~
|
61
|
+
|
62
|
+
My setup
|
63
|
+
--------
|
64
|
+
|
65
|
+
* BibSync for synchronizing
|
66
|
+
* [JabRef](http://jabref.sourceforge.net/) for browsing the bibliography, tagging and categorizing papers
|
67
|
+
* [Biblatex](http://www.ctan.org/pkg/biblatex) to include a bibliography in LaTeX with full Unicode support
|
68
|
+
|
69
|
+
Alternatives
|
70
|
+
------------
|
71
|
+
|
72
|
+
* [Mendeley](http://www.mendeley.com/) (Commercial, synchronizes with their server, limited disk space, bloated gui application)
|
73
|
+
* [Zotero](http://www.zotero.org/) (Firefox plugin, Open source)
|
74
|
+
|
75
|
+
A better name?
|
76
|
+
--------------
|
77
|
+
|
78
|
+
If you have a suggestion for a better name, just let me know...
|
79
|
+
|
80
|
+
Author
|
81
|
+
------
|
82
|
+
|
83
|
+
Daniel Mendler
|
84
|
+
|
85
|
+
License
|
86
|
+
-------
|
87
|
+
|
88
|
+
See LICENSE
|
data/Rakefile
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
begin
|
2
|
+
require 'bundler'
|
3
|
+
Bundler::GemHelper.install_tasks
|
4
|
+
rescue Exception
|
5
|
+
end
|
6
|
+
|
7
|
+
require 'rake/testtask'
|
8
|
+
|
9
|
+
Rake::TestTask.new :test do |t|
|
10
|
+
t.libs << 'lib' << 'test'
|
11
|
+
t.test_files = FileList['test/**/test_*.rb']
|
12
|
+
t.verbose = true
|
13
|
+
t.ruby_opts << '-w' << '-v'
|
14
|
+
end
|
15
|
+
|
16
|
+
task :default => :test
|
data/bibsync.gemspec
CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |s|
|
|
8
8
|
s.date = Date.today.to_s
|
9
9
|
s.authors = ['Daniel Mendler']
|
10
10
|
s.email = ['mail@daniel-mendler.de']
|
11
|
-
s.summary = 'BibSync is a tool to synchronize scientific papers and
|
12
|
-
s.description = 'BibSync is a tool to synchronize scientific papers and
|
11
|
+
s.summary = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
12
|
+
s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
13
13
|
s.homepage = 'https://github.com/minad/bibsync'
|
14
14
|
s.rubyforge_project = s.name
|
15
15
|
|
@@ -18,4 +18,6 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.require_paths = %w(lib)
|
19
19
|
|
20
20
|
s.add_runtime_dependency('nokogiri')
|
21
|
+
s.add_development_dependency('rake')
|
22
|
+
s.add_development_dependency('minitest')
|
21
23
|
end
|
@@ -1,14 +1,14 @@
|
|
1
1
|
module BibSync
|
2
2
|
module Actions
|
3
|
-
class
|
3
|
+
class CheckArXivVersions
|
4
4
|
include Log
|
5
5
|
include Utils
|
6
6
|
|
7
7
|
SliceSize = 20
|
8
8
|
|
9
9
|
def initialize(options)
|
10
|
-
raise '
|
11
|
-
raise '
|
10
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
11
|
+
raise 'Option :dir is required' unless @dir = options[:dir]
|
12
12
|
@update = options[:update]
|
13
13
|
end
|
14
14
|
|
@@ -16,16 +16,16 @@ module BibSync
|
|
16
16
|
notice 'Check for newer version on arXiv'
|
17
17
|
@bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
|
18
18
|
begin
|
19
|
-
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, :
|
19
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(',')}&max_results=#{SliceSize}")
|
20
20
|
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
21
21
|
id.gsub!('http://arxiv.org/abs/', '')
|
22
22
|
if id != entry[i][:arxiv]
|
23
|
-
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", :
|
23
|
+
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
|
24
24
|
arxiv_download(@dir, id) if @update
|
25
25
|
end
|
26
26
|
end
|
27
27
|
rescue => ex
|
28
|
-
error('arXiv query failed', :
|
28
|
+
error('arXiv query failed', ex: ex)
|
29
29
|
end
|
30
30
|
end
|
31
31
|
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module BibSync
|
2
|
+
module Actions
|
3
|
+
class DetermineArXivDOI
|
4
|
+
include Utils
|
5
|
+
include Log
|
6
|
+
|
7
|
+
def initialize(options)
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
|
+
@force = options[:resync]
|
10
|
+
end
|
11
|
+
|
12
|
+
def run
|
13
|
+
notice 'Determine arXiv and DOI identifiers'
|
14
|
+
|
15
|
+
@bib.each do |entry|
|
16
|
+
next if entry.comment? ||
|
17
|
+
(entry[:doi] && entry[:arxiv]) ||
|
18
|
+
(!@force && entry[:title] && entry[:author] && entry[:year])
|
19
|
+
|
20
|
+
determine_arxiv_and_doi(entry)
|
21
|
+
|
22
|
+
@bib.save
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def determine_arxiv_and_doi(entry)
|
29
|
+
if file = entry.file
|
30
|
+
if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
|
31
|
+
debug('Searching for arXiv or doi identifier in pdf file', key: entry)
|
32
|
+
text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
|
33
|
+
entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
|
34
|
+
entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
|
35
|
+
end
|
36
|
+
|
37
|
+
if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
|
38
|
+
debug('Interpreting file name as arXiv identifier', key: entry)
|
39
|
+
entry[:arxiv] = $1
|
40
|
+
end
|
41
|
+
|
42
|
+
if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
|
43
|
+
debug('Interpreting file name as doi identifier', key: entry)
|
44
|
+
entry[:doi] = "10.1103/#{$1}"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
if !entry[:arxiv] && entry[:doi]
|
49
|
+
begin
|
50
|
+
info('Fetch missing arXiv identifier', key: entry)
|
51
|
+
xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
|
52
|
+
if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
|
53
|
+
id = xml.xpath('//entry/id').map(&:content).first
|
54
|
+
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
55
|
+
entry[:arxiv] = $1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
rescue => ex
|
59
|
+
error('arXiv query by DOI failed', ex: ex, key: entry)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
unless entry[:arxiv] || entry[:doi]
|
64
|
+
warning('No arXiv or DOI identifier found', key: entry)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -7,17 +7,19 @@ module BibSync
|
|
7
7
|
include Utils
|
8
8
|
|
9
9
|
def initialize(options)
|
10
|
-
raise '
|
11
|
-
raise '
|
10
|
+
raise 'Option :fetch is required' unless @fetch = options[:fetch]
|
11
|
+
raise 'Option :dir is required' unless @dir = options[:dir]
|
12
12
|
end
|
13
13
|
|
14
14
|
def run
|
15
|
-
|
15
|
+
arxivs = []
|
16
16
|
urls = []
|
17
17
|
|
18
18
|
@fetch.each do |url|
|
19
|
-
if url =~
|
20
|
-
|
19
|
+
if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
|
20
|
+
arxivs << $1
|
21
|
+
elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
|
22
|
+
arxivs << $1
|
21
23
|
else
|
22
24
|
urls << url
|
23
25
|
end
|
@@ -31,18 +33,18 @@ module BibSync
|
|
31
33
|
end
|
32
34
|
end
|
33
35
|
|
34
|
-
unless
|
36
|
+
unless arxivs.empty?
|
35
37
|
notice 'Downloading from arXiv'
|
36
|
-
|
38
|
+
arxivs.each_slice(SliceSize) do |ids|
|
37
39
|
begin
|
38
40
|
xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{ids.join(',')}&max_results=#{SliceSize}")
|
39
41
|
xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
|
40
42
|
id.gsub!('http://arxiv.org/abs/', '')
|
41
|
-
info 'arXiv download', :
|
43
|
+
info 'arXiv download', key: id
|
42
44
|
arxiv_download(@dir, id)
|
43
45
|
end
|
44
46
|
rescue => ex
|
45
|
-
error('arXiv query failed', :
|
47
|
+
error('arXiv query failed', ex: ex)
|
46
48
|
end
|
47
49
|
end
|
48
50
|
end
|
@@ -5,8 +5,8 @@ module BibSync
|
|
5
5
|
include Utils
|
6
6
|
|
7
7
|
def initialize(options)
|
8
|
-
raise '
|
9
|
-
raise '
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
|
+
raise 'Option :citedbyme is required' unless @dir = options[:citedbyme]
|
10
10
|
raise "#{@dir} is not a directory" unless File.directory?(@dir)
|
11
11
|
end
|
12
12
|
|
@@ -19,7 +19,7 @@ module BibSync
|
|
19
19
|
$1.split(/\s*,\s*/).each do |key|
|
20
20
|
key.strip!
|
21
21
|
file = @bib.relative_path(file)
|
22
|
-
debug("Cited in #{file}", :
|
22
|
+
debug("Cited in #{file}", key: key)
|
23
23
|
(cites[key] ||= []) << file
|
24
24
|
end
|
25
25
|
end
|
@@ -35,7 +35,7 @@ module BibSync
|
|
35
35
|
if @bib[key]
|
36
36
|
@bib[key][:citedbyme] = files
|
37
37
|
else
|
38
|
-
warning("Cited in #{files} but not found in #{@bib.file}", :
|
38
|
+
warning("Cited in #{files} but not found in #{@bib.file}", key: key)
|
39
39
|
end
|
40
40
|
end
|
41
41
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module BibSync
|
2
2
|
module Actions
|
3
|
-
class
|
3
|
+
class JabRefFormat
|
4
4
|
include Utils
|
5
5
|
include Log
|
6
6
|
|
7
7
|
def initialize(options)
|
8
|
-
raise '
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
9
|
end
|
10
10
|
|
11
11
|
def run
|
@@ -7,8 +7,8 @@ module BibSync
|
|
7
7
|
FileTypes = %w(djvu pdf ps)
|
8
8
|
|
9
9
|
def initialize(options)
|
10
|
-
raise '
|
11
|
-
raise '
|
10
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
11
|
+
raise 'Option :dir is required' unless @dir = options[:dir]
|
12
12
|
end
|
13
13
|
|
14
14
|
def run
|
@@ -17,16 +17,15 @@ module BibSync
|
|
17
17
|
files = {}
|
18
18
|
Dir[File.join(@dir, "**/*.{#{FileTypes.join(',')}}")].sort.each do |file|
|
19
19
|
name = File.basename(file)
|
20
|
-
key
|
20
|
+
key = name_without_ext(name)
|
21
21
|
raise "Duplicate file #{name}" if files[key]
|
22
22
|
files[key] = file
|
23
23
|
end
|
24
24
|
|
25
25
|
files.each do |key, file|
|
26
26
|
unless entry = @bib[key]
|
27
|
-
info('New file', :
|
28
|
-
entry = Bibliography::Entry.new
|
29
|
-
entry.key = key
|
27
|
+
info('New file', key: key)
|
28
|
+
entry = Bibliography::Entry.new(key: key)
|
30
29
|
@bib << entry
|
31
30
|
end
|
32
31
|
|
@@ -5,7 +5,7 @@ module BibSync
|
|
5
5
|
include Log
|
6
6
|
|
7
7
|
def initialize(options)
|
8
|
-
raise '
|
8
|
+
raise 'Option :bib is required' unless @bib = options[:bib]
|
9
9
|
@force = options[:resync]
|
10
10
|
end
|
11
11
|
|
@@ -16,10 +16,8 @@ module BibSync
|
|
16
16
|
next if entry.comment?
|
17
17
|
|
18
18
|
if @force || !(entry[:title] && entry[:author] && entry[:year])
|
19
|
-
determine_arxiv_and_doi(entry)
|
20
|
-
|
21
19
|
if entry[:arxiv]
|
22
|
-
if entry.key == arxiv_id(entry, :
|
20
|
+
if entry.key == arxiv_id(entry, prefix: false, version: true)
|
23
21
|
entry = rename_arxiv_file(entry)
|
24
22
|
next unless entry
|
25
23
|
end
|
@@ -40,28 +38,28 @@ module BibSync
|
|
40
38
|
private
|
41
39
|
|
42
40
|
def update_aps_abstract(entry)
|
43
|
-
info("Downloading APS abstract", :
|
41
|
+
info("Downloading APS abstract", key: entry)
|
44
42
|
html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
|
45
43
|
entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
|
46
44
|
rescue => ex
|
47
|
-
error('Abstract download failed', :
|
45
|
+
error('Abstract download failed', key: entry, ex: ex)
|
48
46
|
end
|
49
47
|
|
50
48
|
def update_doi(entry)
|
51
|
-
info('Downloading
|
49
|
+
info('Downloading DOI metadata', key: entry)
|
52
50
|
text = fetch("http://dx.doi.org/#{entry[:doi]}", 'Accept' => 'text/bibliography; style=bibtex')
|
53
51
|
raise text if text == 'Unknown DOI'
|
54
52
|
Bibliography::Entry.parse(text).each {|k, v| entry[k] = v }
|
55
53
|
rescue => ex
|
56
54
|
entry.delete(:doi)
|
57
|
-
error('
|
55
|
+
error('DOI download failed', key: entry, ex: ex)
|
58
56
|
end
|
59
57
|
|
60
58
|
# Rename arxiv file if key contains version
|
61
59
|
def rename_arxiv_file(entry)
|
62
60
|
file = entry.file
|
63
61
|
|
64
|
-
key = arxiv_id(entry, :
|
62
|
+
key = arxiv_id(entry, prefix: false, version: false)
|
65
63
|
|
66
64
|
if old_entry = @bib[key]
|
67
65
|
# Existing entry found
|
@@ -71,7 +69,7 @@ module BibSync
|
|
71
69
|
entry[:arxiv] =~ /v(\d+)$/
|
72
70
|
new_version = $1
|
73
71
|
if old_version && new_version && old_version >= new_version
|
74
|
-
info('Not updating existing entry with older version', :
|
72
|
+
info('Not updating existing entry with older version', key: old_entry)
|
75
73
|
File.delete(file[:path]) if file
|
76
74
|
return nil
|
77
75
|
end
|
@@ -79,14 +77,14 @@ module BibSync
|
|
79
77
|
old_entry[:arxiv] = entry[:arxiv]
|
80
78
|
old_entry[:doi] = entry[:doi]
|
81
79
|
entry = old_entry
|
82
|
-
info('Updating existing entry', :
|
80
|
+
info('Updating existing entry', key: entry)
|
83
81
|
else
|
84
82
|
# This is a new entry
|
85
83
|
entry.key = key
|
86
84
|
end
|
87
85
|
|
88
86
|
if file
|
89
|
-
new_path = file[:path].sub(arxiv_id(entry, :
|
87
|
+
new_path = file[:path].sub(arxiv_id(entry, prefix: false, version: true), key)
|
90
88
|
File.rename(file[:path], new_path)
|
91
89
|
entry.file = new_path
|
92
90
|
end
|
@@ -97,8 +95,8 @@ module BibSync
|
|
97
95
|
end
|
98
96
|
|
99
97
|
def update_arxiv(entry)
|
100
|
-
info('Downloading arXiv metadata', :
|
101
|
-
xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, :
|
98
|
+
info('Downloading arXiv metadata', key: entry)
|
99
|
+
xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}&metadataPrefix=arXiv")
|
102
100
|
error = xml.xpath('//error').map(&:content).first
|
103
101
|
raise error if error
|
104
102
|
|
@@ -108,7 +106,7 @@ module BibSync
|
|
108
106
|
entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
|
109
107
|
"{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
|
110
108
|
end.join(' and ')
|
111
|
-
entry[:journal] =
|
109
|
+
entry[:journal] = 'ArXiv e-prints'
|
112
110
|
entry[:eprint] = entry[:arxiv]
|
113
111
|
entry[:archiveprefix] = 'arXiv'
|
114
112
|
date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
|
@@ -124,49 +122,8 @@ module BibSync
|
|
124
122
|
entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
|
125
123
|
rescue => ex
|
126
124
|
entry.delete(:arxiv)
|
127
|
-
error('arXiv download failed', :
|
125
|
+
error('arXiv download failed', key: entry, ex: ex)
|
128
126
|
end
|
129
|
-
|
130
|
-
def determine_arxiv_and_doi(entry)
|
131
|
-
if file = entry.file
|
132
|
-
if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
|
133
|
-
debug('Searching for arXiv or doi identifier in pdf file', :key => entry)
|
134
|
-
text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
|
135
|
-
entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
|
136
|
-
entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
|
137
|
-
end
|
138
|
-
|
139
|
-
if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
|
140
|
-
debug('Interpreting file name as arXiv identifier', :key => entry)
|
141
|
-
entry[:arxiv] = $1
|
142
|
-
end
|
143
|
-
|
144
|
-
if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
|
145
|
-
debug('Interpreting file name as doi identifier', :key => entry)
|
146
|
-
entry[:doi] = "10.1103/#{$1}"
|
147
|
-
end
|
148
|
-
end
|
149
|
-
|
150
|
-
if !entry[:arxiv] && entry[:doi]
|
151
|
-
begin
|
152
|
-
info('Fetch missing arXiv identifier', :key => entry)
|
153
|
-
xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
|
154
|
-
if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
|
155
|
-
id = xml.xpath('//entry/id').map(&:content).first
|
156
|
-
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
157
|
-
entry[:arxiv] = $1
|
158
|
-
end
|
159
|
-
end
|
160
|
-
rescue => ex
|
161
|
-
error('arXiv doi query failed', :ex => ex, :key => entry)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
|
165
|
-
unless entry[:arxiv] || entry[:doi]
|
166
|
-
warning('No arXiv or doi identifier found', :key => entry)
|
167
|
-
end
|
168
|
-
end
|
169
|
-
|
170
127
|
end
|
171
128
|
end
|
172
129
|
end
|
@@ -10,7 +10,7 @@ module BibSync
|
|
10
10
|
|
11
11
|
def run
|
12
12
|
notice 'Check validity'
|
13
|
-
titles, arxivs = {}, {}
|
13
|
+
titles, arxivs, dois = {}, {}, {}
|
14
14
|
|
15
15
|
@bib.each do |entry|
|
16
16
|
next if entry.comment?
|
@@ -18,14 +18,16 @@ module BibSync
|
|
18
18
|
w = []
|
19
19
|
|
20
20
|
file = entry.file
|
21
|
-
w << 'Missing file' unless file && File.file?(file[:path])
|
22
21
|
|
23
|
-
|
22
|
+
missing = []
|
23
|
+
missing << :file unless file && File.file?(file[:path])
|
24
|
+
missing += [:title, :author, :year, :abstract].reject {|k| entry[k] }
|
25
|
+
w << "Missing #{missing.map(&:to_s).sort.join(', ')}" unless missing.empty?
|
24
26
|
|
25
|
-
w << '
|
27
|
+
w << 'File name does not match entry key' if name_without_ext(file[:name]) != entry.key if file
|
26
28
|
|
27
29
|
if entry[:arxiv]
|
28
|
-
id = arxiv_id(entry, :
|
30
|
+
id = arxiv_id(entry, version: false, prefix: true)
|
29
31
|
if arxivs.include?(id)
|
30
32
|
w << "ArXiv duplicate of '#{arxivs[id]}'"
|
31
33
|
else
|
@@ -33,6 +35,14 @@ module BibSync
|
|
33
35
|
end
|
34
36
|
end
|
35
37
|
|
38
|
+
if id = entry[:doi]
|
39
|
+
if dois.include?(id)
|
40
|
+
w << "DOI duplicate of '#{dois[id]}'"
|
41
|
+
else
|
42
|
+
dois[id] = entry.key
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
36
46
|
if entry[:title]
|
37
47
|
if titles.include?(entry[:title])
|
38
48
|
w << "Title duplicate of '#{titles[entry[:title]]}'"
|
@@ -41,7 +51,7 @@ module BibSync
|
|
41
51
|
end
|
42
52
|
end
|
43
53
|
|
44
|
-
warning(w.join('
|
54
|
+
warning(w.join('; '), key: entry) unless w.empty?
|
45
55
|
end
|
46
56
|
end
|
47
57
|
end
|
data/lib/bibsync/actions.rb
CHANGED
@@ -1,7 +1 @@
|
|
1
|
-
|
2
|
-
require 'bibsync/actions/synchronize_files'
|
3
|
-
require 'bibsync/actions/synchronize_metadata'
|
4
|
-
require 'bibsync/actions/validate'
|
5
|
-
require 'bibsync/actions/jabref_format'
|
6
|
-
require 'bibsync/actions/fetch_from_arxiv'
|
7
|
-
require 'bibsync/actions/find_my_citations'
|
1
|
+
Dir[File.join(File.dirname(__FILE__), 'actions', '*.rb')].each {|f| require f }
|