bibsync 0.0.5 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/.travis.yml +3 -1
- data/README.md +17 -1
- data/bibsync.gemspec +2 -2
- data/lib/bibsync.rb +1 -1
- data/lib/bibsync/actions/check_arxiv_versions.rb +2 -2
- data/lib/bibsync/actions/determine_arxiv_doi.rb +3 -4
- data/lib/bibsync/actions/fetch_from_arxiv.rb +3 -3
- data/lib/bibsync/actions/find_my_citations.rb +0 -2
- data/lib/bibsync/actions/synchronize_files.rb +0 -2
- data/lib/bibsync/actions/synchronize_metadata.rb +42 -25
- data/lib/bibsync/bibliography.rb +7 -2
- data/lib/bibsync/command.rb +4 -3
- data/lib/bibsync/utils.rb +1 -7
- data/lib/bibsync/version.rb +1 -1
- data/test/test_utils.rb +1 -7
- metadata +5 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5578a98ae327ca6b8ee4b5fb0da4b2bbd1786116
|
4
|
+
data.tar.gz: eb56b9ab2ce3d9f1b6599da6a1b4ca0c76db220e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3050a0ec740223617d6f700bca7725443e3ab2b1a1cce7ef46c6091ed777d18b3bdc3f6f9ad17abcce3b40f923959069960d977128c112cfbe84e933a3bcfbcd
|
7
|
+
data.tar.gz: d4692dcd5ce86c2cfaf6698a7c77ca5450487067077ba87804325b54cf77a2e7654ae025914819476be79f649e56d21e1c5e056402e46dc02a5d8a3113ed0a57
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -27,11 +27,21 @@ BibSync supports the following features:
|
|
27
27
|
* Downloading of new versions of [arXiv](http://arxiv.org/) papers
|
28
28
|
* Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
|
29
29
|
* Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
|
30
|
-
* Works under every platform supporting Ruby (Linux, Windows, ...)
|
30
|
+
* Works under every platform supporting Ruby and `pdftotext` (Linux, Windows, ...)
|
31
31
|
|
32
32
|
Quick start
|
33
33
|
-----------
|
34
34
|
|
35
|
+
At first you have to ensure that you have the `pdftotext` program available on your `$PATH`. Under Debian you can install
|
36
|
+
the package using `apt-get` as follows
|
37
|
+
|
38
|
+
~~~
|
39
|
+
$ apt-get install poppler-utils
|
40
|
+
$ pdftotext
|
41
|
+
pdftotext version 0.24.1
|
42
|
+
...
|
43
|
+
~~~
|
44
|
+
|
35
45
|
BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
|
36
46
|
the command line
|
37
47
|
|
@@ -39,6 +49,12 @@ the command line
|
|
39
49
|
$ gem install bibsync
|
40
50
|
~~~
|
41
51
|
|
52
|
+
And for updating, you write
|
53
|
+
|
54
|
+
~~~
|
55
|
+
$ gem update bibsync
|
56
|
+
~~~
|
57
|
+
|
42
58
|
After that you can use the 'bibsync' tool on the command line. At first let's validate
|
43
59
|
a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.
|
44
60
|
|
data/bibsync.gemspec
CHANGED
@@ -9,15 +9,15 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.authors = ['Daniel Mendler']
|
10
10
|
s.email = ['mail@daniel-mendler.de']
|
11
11
|
s.summary = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
12
|
-
s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
12
|
+
s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files. It automatically downloads the metadata from dx.doi.org and arxiv.org.'
|
13
13
|
s.homepage = 'https://github.com/minad/bibsync'
|
14
14
|
s.rubyforge_project = s.name
|
15
|
+
s.license = 'MIT'
|
15
16
|
|
16
17
|
s.files = `git ls-files`.split("\n")
|
17
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
19
|
s.require_paths = %w(lib)
|
19
20
|
|
20
|
-
s.add_runtime_dependency('nokogiri')
|
21
21
|
s.add_runtime_dependency('faraday')
|
22
22
|
s.add_runtime_dependency('faraday_middleware')
|
23
23
|
s.add_development_dependency('rake')
|
data/lib/bibsync.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require 'nokogiri'
|
2
1
|
require 'faraday'
|
3
2
|
require 'faraday_middleware'
|
4
3
|
require 'shellwords'
|
5
4
|
require 'date'
|
6
5
|
require 'pathname'
|
7
6
|
require 'forwardable'
|
7
|
+
require 'rexml/document'
|
8
8
|
require 'bibsync/version'
|
9
9
|
require 'bibsync/utils'
|
10
10
|
require 'bibsync/log'
|
@@ -17,8 +17,8 @@ module BibSync
|
|
17
17
|
@bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
|
18
18
|
begin
|
19
19
|
xml = fetch_xml('http://export.arxiv.org/api/query', id_list: entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(','), max_results: SliceSize)
|
20
|
-
xml.
|
21
|
-
id.gsub
|
20
|
+
xml.get_elements('//entry').each_with_index do |e, i|
|
21
|
+
id = e.elements['id'].text.gsub('http://arxiv.org/abs/', '')
|
22
22
|
if id != entry[i][:arxiv]
|
23
23
|
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
|
24
24
|
arxiv_download(@dir, id) if @update
|
@@ -18,8 +18,6 @@ module BibSync
|
|
18
18
|
(!@force && entry[:title] && entry[:author] && entry[:year])
|
19
19
|
|
20
20
|
determine_arxiv_and_doi(entry)
|
21
|
-
|
22
|
-
@bib.save
|
23
21
|
end
|
24
22
|
end
|
25
23
|
|
@@ -49,8 +47,9 @@ module BibSync
|
|
49
47
|
begin
|
50
48
|
info('Fetch missing arXiv identifier', key: entry)
|
51
49
|
xml = fetch_xml('http://export.arxiv.org/api/query', search_query: "doi:#{entry[:doi]}", max_results: 1)
|
52
|
-
|
53
|
-
|
50
|
+
doi = xml.elements['//arxiv:doi']
|
51
|
+
if doi && doi.text == entry[:doi]
|
52
|
+
id = xml.elements['//entry/id'].text
|
54
53
|
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
55
54
|
entry[:arxiv] = $1
|
56
55
|
end
|
@@ -18,7 +18,7 @@ module BibSync
|
|
18
18
|
@fetch.each do |url|
|
19
19
|
if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
|
20
20
|
arxivs << $1
|
21
|
-
elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
|
21
|
+
elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)(v\d+)?\Z}
|
22
22
|
arxivs << $1
|
23
23
|
else
|
24
24
|
urls << url
|
@@ -38,8 +38,8 @@ module BibSync
|
|
38
38
|
arxivs.each_slice(SliceSize) do |ids|
|
39
39
|
begin
|
40
40
|
xml = fetch_xml('http://export.arxiv.org/api/query', id_list: ids.join(','), max_results: SliceSize)
|
41
|
-
xml.
|
42
|
-
id.gsub
|
41
|
+
xml.each_element('//entry/id') do |id|
|
42
|
+
id = id.text.gsub('http://arxiv.org/abs/', '')
|
43
43
|
info 'arXiv download', key: id
|
44
44
|
arxiv_download(@dir, id)
|
45
45
|
end
|
@@ -15,6 +15,8 @@ module BibSync
|
|
15
15
|
@bib.to_a.each do |entry|
|
16
16
|
next if entry.comment?
|
17
17
|
|
18
|
+
entry.delete(:abstract) if @force
|
19
|
+
|
18
20
|
if @force || !(entry[:title] && entry[:author] && entry[:year])
|
19
21
|
if entry[:arxiv]
|
20
22
|
if entry.key == arxiv_id(entry, prefix: false, version: true)
|
@@ -27,11 +29,12 @@ module BibSync
|
|
27
29
|
update_doi(entry) if entry[:doi]
|
28
30
|
end
|
29
31
|
|
30
|
-
if entry[:doi] =~ /\A10\.1103\// &&
|
32
|
+
if entry[:doi] =~ /\A10\.1103\// && !entry[:abstract]
|
31
33
|
update_aps_abstract(entry)
|
32
34
|
end
|
33
35
|
|
34
|
-
|
36
|
+
# Add timestamp when this entry was added
|
37
|
+
entry[:added] ||= Date.today.to_s
|
35
38
|
end
|
36
39
|
end
|
37
40
|
|
@@ -39,20 +42,34 @@ module BibSync
|
|
39
42
|
|
40
43
|
def update_aps_abstract(entry)
|
41
44
|
info("Downloading APS abstract", key: entry)
|
42
|
-
html =
|
43
|
-
|
45
|
+
html = fetch("http://link.aps.org/doi/#{entry[:doi]}")
|
46
|
+
if html =~ %r{<div class='aps-abstractbox'>(.*?)</div>}
|
47
|
+
entry[:abstract] = $1.gsub(/<[^>]+>/, '')
|
48
|
+
end
|
44
49
|
rescue => ex
|
45
50
|
error('Abstract download failed', key: entry, ex: ex)
|
46
51
|
end
|
47
52
|
|
48
53
|
def update_doi(entry)
|
49
|
-
|
50
|
-
|
54
|
+
url = "http://dx.doi.org/#{entry[:doi]}"
|
55
|
+
info("Downloading DOI metadata from #{url}", key: entry)
|
56
|
+
text = fetch(url, nil, 'Accept' => 'text/bibliography; style=bibtex')
|
51
57
|
raise text if text == 'Unknown DOI'
|
52
58
|
Entry.parse(text).each {|k, v| entry[k] = v }
|
53
59
|
rescue => ex
|
54
|
-
entry.delete(:doi)
|
55
60
|
error('DOI download failed', key: entry, ex: ex)
|
61
|
+
# dx.doi.org shows spurious 500 errors
|
62
|
+
if ex.respond_to?(:response) && ex.response[:status] == 500
|
63
|
+
tries ||= 0
|
64
|
+
tries += 1
|
65
|
+
if tries < 10
|
66
|
+
info('Retrying...', key: entry)
|
67
|
+
retry
|
68
|
+
else
|
69
|
+
error('Giving up :(', key: entry)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
entry.delete(:doi)
|
56
73
|
end
|
57
74
|
|
58
75
|
# Rename arxiv file if key contains version
|
@@ -89,36 +106,36 @@ module BibSync
|
|
89
106
|
entry.file = new_path
|
90
107
|
end
|
91
108
|
|
92
|
-
@bib.save
|
93
|
-
|
94
109
|
entry
|
95
110
|
end
|
96
111
|
|
97
112
|
def update_arxiv(entry)
|
98
113
|
info('Downloading arXiv metadata', key: entry)
|
114
|
+
|
99
115
|
xml = fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: "oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}", metadataPrefix: 'arXiv')
|
100
|
-
error = xml.
|
101
|
-
raise error if error
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
entry[:
|
106
|
-
entry[:
|
107
|
-
|
116
|
+
error = xml.elements['//error']
|
117
|
+
raise error.text if error
|
118
|
+
|
119
|
+
arXiv = xml.elements['//arXiv']
|
120
|
+
|
121
|
+
entry[:title] = arXiv.elements['title'].text
|
122
|
+
entry[:abstract] = arXiv.elements['abstract'].text
|
123
|
+
entry[:arxivcategories] = arXiv.elements['categories'].text
|
124
|
+
entry[:primaryclass] = entry[:arxivcategories].split(/\s+/).first
|
125
|
+
entry[:author] = arXiv.get_elements('authors/author').map do |author|
|
126
|
+
"{#{author.elements['keyname'].text}}, {#{author.elements['forenames'].text}}"
|
108
127
|
end.join(' and ')
|
109
128
|
entry[:journal] = 'ArXiv e-prints'
|
110
129
|
entry[:eprint] = entry[:arxiv]
|
111
130
|
entry[:archiveprefix] = 'arXiv'
|
112
|
-
|
113
|
-
|
131
|
+
entry[:arxivcreated] = arXiv.elements['created'].text if arXiv.elements['created']
|
132
|
+
entry[:arxivupdated] = arXiv.elements['updated'].text if arXiv.elements['updated']
|
133
|
+
date = Date.parse(entry[:arxivupdated] || entry[:arxivcreated])
|
114
134
|
entry[:year] = date.year
|
115
135
|
entry[:month] = Literal.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
|
116
|
-
doi =
|
117
|
-
entry[:
|
118
|
-
|
119
|
-
entry[:journal] = journal if journal
|
120
|
-
comments = xml.xpath('//arXiv/comments').map(&:content).first
|
121
|
-
entry[:comments] = comments if comments
|
136
|
+
entry[:doi] = arXiv.elements['doi'].text if arXiv.elements['doi']
|
137
|
+
entry[:journal] = arXiv.elements['journal-ref'].text if arXiv.elements['journal-ref']
|
138
|
+
entry[:comments] = arXiv.elements['comments'].text if arXiv.elements['comments']
|
122
139
|
entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
|
123
140
|
rescue => ex
|
124
141
|
entry.delete(:arxiv)
|
data/lib/bibsync/bibliography.rb
CHANGED
@@ -55,8 +55,13 @@ module BibSync
|
|
55
55
|
raise 'No filename given' unless @file
|
56
56
|
if @dirty
|
57
57
|
@save_hook.call(self) if @save_hook
|
58
|
-
|
59
|
-
|
58
|
+
tmpfile = "#{@file}.tmp"
|
59
|
+
begin
|
60
|
+
File.open(tmpfile, 'w') {|f| f.write(self) }
|
61
|
+
File.rename(tmpfile, @file)
|
62
|
+
ensure
|
63
|
+
File.unlink(tmpfile) rescue nil
|
64
|
+
end
|
60
65
|
@dirty = false
|
61
66
|
true
|
62
67
|
else
|
data/lib/bibsync/command.rb
CHANGED
@@ -83,14 +83,15 @@ module BibSync
|
|
83
83
|
|
84
84
|
def process
|
85
85
|
if @args.size != 0
|
86
|
-
|
86
|
+
puts 'Too many arguments'
|
87
87
|
puts @opts
|
88
88
|
exit
|
89
89
|
end
|
90
90
|
|
91
91
|
if @options[:bib]
|
92
|
-
@options[:bib] = Bibliography.new(@options[:bib])
|
93
|
-
|
92
|
+
bib = @options[:bib] = Bibliography.new(@options[:bib])
|
93
|
+
bib.save_hook = Transformer.new
|
94
|
+
at_exit { bib.save }
|
94
95
|
end
|
95
96
|
|
96
97
|
actions = []
|
data/lib/bibsync/utils.rb
CHANGED
@@ -27,13 +27,7 @@ module BibSync
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def fetch_xml(url, params = nil, headers = nil)
|
30
|
-
|
31
|
-
xml.remove_namespaces!
|
32
|
-
xml
|
33
|
-
end
|
34
|
-
|
35
|
-
def fetch_html(url, params = nil, headers = nil)
|
36
|
-
Nokogiri::HTML(fetch(url, params, headers))
|
30
|
+
REXML::Document.new(fetch(url, params, headers)).root
|
37
31
|
end
|
38
32
|
|
39
33
|
def arxiv_id(arxiv, opts = {})
|
data/lib/bibsync/version.rb
CHANGED
data/test/test_utils.rb
CHANGED
@@ -24,13 +24,7 @@ describe BibSync::Utils do
|
|
24
24
|
|
25
25
|
describe '#fetch_xml' do
|
26
26
|
it 'fetches xml' do
|
27
|
-
fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
describe '#fetch_html' do
|
32
|
-
it 'fetches html' do
|
33
|
-
fetch_html('http://google.com').must_be_instance_of Nokogiri::HTML::Document
|
27
|
+
fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of REXML::Element
|
34
28
|
end
|
35
29
|
end
|
36
30
|
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bibsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Mendler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: nokogiri
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - '>='
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - '>='
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: faraday
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,7 +67,7 @@ dependencies:
|
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '0'
|
83
69
|
description: BibSync is a tool to synchronize scientific papers and BibTeX bibliography
|
84
|
-
files
|
70
|
+
files. It automatically downloads the metadata from dx.doi.org and arxiv.org.
|
85
71
|
email:
|
86
72
|
- mail@daniel-mendler.de
|
87
73
|
executables:
|
@@ -135,7 +121,8 @@ files:
|
|
135
121
|
- test/test_entry.rb
|
136
122
|
- test/test_utils.rb
|
137
123
|
homepage: https://github.com/minad/bibsync
|
138
|
-
licenses:
|
124
|
+
licenses:
|
125
|
+
- MIT
|
139
126
|
metadata: {}
|
140
127
|
post_install_message:
|
141
128
|
rdoc_options: []
|