bibsync 0.0.5 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -1
- data/.travis.yml +3 -1
- data/README.md +17 -1
- data/bibsync.gemspec +2 -2
- data/lib/bibsync.rb +1 -1
- data/lib/bibsync/actions/check_arxiv_versions.rb +2 -2
- data/lib/bibsync/actions/determine_arxiv_doi.rb +3 -4
- data/lib/bibsync/actions/fetch_from_arxiv.rb +3 -3
- data/lib/bibsync/actions/find_my_citations.rb +0 -2
- data/lib/bibsync/actions/synchronize_files.rb +0 -2
- data/lib/bibsync/actions/synchronize_metadata.rb +42 -25
- data/lib/bibsync/bibliography.rb +7 -2
- data/lib/bibsync/command.rb +4 -3
- data/lib/bibsync/utils.rb +1 -7
- data/lib/bibsync/version.rb +1 -1
- data/test/test_utils.rb +1 -7
- metadata +5 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5578a98ae327ca6b8ee4b5fb0da4b2bbd1786116
|
4
|
+
data.tar.gz: eb56b9ab2ce3d9f1b6599da6a1b4ca0c76db220e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3050a0ec740223617d6f700bca7725443e3ab2b1a1cce7ef46c6091ed777d18b3bdc3f6f9ad17abcce3b40f923959069960d977128c112cfbe84e933a3bcfbcd
|
7
|
+
data.tar.gz: d4692dcd5ce86c2cfaf6698a7c77ca5450487067077ba87804325b54cf77a2e7654ae025914819476be79f649e56d21e1c5e056402e46dc02a5d8a3113ed0a57
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -27,11 +27,21 @@ BibSync supports the following features:
|
|
27
27
|
* Downloading of new versions of [arXiv](http://arxiv.org/) papers
|
28
28
|
* Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
|
29
29
|
* Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
|
30
|
-
* Works under every platform supporting Ruby (Linux, Windows, ...)
|
30
|
+
* Works under every platform supporting Ruby and `pdftotext` (Linux, Windows, ...)
|
31
31
|
|
32
32
|
Quick start
|
33
33
|
-----------
|
34
34
|
|
35
|
+
At first you have to ensure that you have the `pdftotext` program available on your `$PATH`. Under Debian you can install
|
36
|
+
the package using `apt-get` as follows
|
37
|
+
|
38
|
+
~~~
|
39
|
+
$ apt-get install poppler-utils
|
40
|
+
$ pdftotext
|
41
|
+
pdftotext version 0.24.1
|
42
|
+
...
|
43
|
+
~~~
|
44
|
+
|
35
45
|
BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
|
36
46
|
the command line
|
37
47
|
|
@@ -39,6 +49,12 @@ the command line
|
|
39
49
|
$ gem install bibsync
|
40
50
|
~~~
|
41
51
|
|
52
|
+
And for updating, you write
|
53
|
+
|
54
|
+
~~~
|
55
|
+
$ gem update bibsync
|
56
|
+
~~~
|
57
|
+
|
42
58
|
After that you can use the 'bibsync' tool on the command line. At first let's validate
|
43
59
|
a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.
|
44
60
|
|
data/bibsync.gemspec
CHANGED
@@ -9,15 +9,15 @@ Gem::Specification.new do |s|
|
|
9
9
|
s.authors = ['Daniel Mendler']
|
10
10
|
s.email = ['mail@daniel-mendler.de']
|
11
11
|
s.summary = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
12
|
-
s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
|
12
|
+
s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files. It automatically downloads the metadata from dx.doi.org and arxiv.org.'
|
13
13
|
s.homepage = 'https://github.com/minad/bibsync'
|
14
14
|
s.rubyforge_project = s.name
|
15
|
+
s.license = 'MIT'
|
15
16
|
|
16
17
|
s.files = `git ls-files`.split("\n")
|
17
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
18
19
|
s.require_paths = %w(lib)
|
19
20
|
|
20
|
-
s.add_runtime_dependency('nokogiri')
|
21
21
|
s.add_runtime_dependency('faraday')
|
22
22
|
s.add_runtime_dependency('faraday_middleware')
|
23
23
|
s.add_development_dependency('rake')
|
data/lib/bibsync.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require 'nokogiri'
|
2
1
|
require 'faraday'
|
3
2
|
require 'faraday_middleware'
|
4
3
|
require 'shellwords'
|
5
4
|
require 'date'
|
6
5
|
require 'pathname'
|
7
6
|
require 'forwardable'
|
7
|
+
require 'rexml/document'
|
8
8
|
require 'bibsync/version'
|
9
9
|
require 'bibsync/utils'
|
10
10
|
require 'bibsync/log'
|
@@ -17,8 +17,8 @@ module BibSync
|
|
17
17
|
@bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
|
18
18
|
begin
|
19
19
|
xml = fetch_xml('http://export.arxiv.org/api/query', id_list: entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(','), max_results: SliceSize)
|
20
|
-
xml.
|
21
|
-
id.gsub
|
20
|
+
xml.get_elements('//entry').each_with_index do |e, i|
|
21
|
+
id = e.elements['id'].text.gsub('http://arxiv.org/abs/', '')
|
22
22
|
if id != entry[i][:arxiv]
|
23
23
|
info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
|
24
24
|
arxiv_download(@dir, id) if @update
|
@@ -18,8 +18,6 @@ module BibSync
|
|
18
18
|
(!@force && entry[:title] && entry[:author] && entry[:year])
|
19
19
|
|
20
20
|
determine_arxiv_and_doi(entry)
|
21
|
-
|
22
|
-
@bib.save
|
23
21
|
end
|
24
22
|
end
|
25
23
|
|
@@ -49,8 +47,9 @@ module BibSync
|
|
49
47
|
begin
|
50
48
|
info('Fetch missing arXiv identifier', key: entry)
|
51
49
|
xml = fetch_xml('http://export.arxiv.org/api/query', search_query: "doi:#{entry[:doi]}", max_results: 1)
|
52
|
-
|
53
|
-
|
50
|
+
doi = xml.elements['//arxiv:doi']
|
51
|
+
if doi && doi.text == entry[:doi]
|
52
|
+
id = xml.elements['//entry/id'].text
|
54
53
|
if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
|
55
54
|
entry[:arxiv] = $1
|
56
55
|
end
|
@@ -18,7 +18,7 @@ module BibSync
|
|
18
18
|
@fetch.each do |url|
|
19
19
|
if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
|
20
20
|
arxivs << $1
|
21
|
-
elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
|
21
|
+
elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)(v\d+)?\Z}
|
22
22
|
arxivs << $1
|
23
23
|
else
|
24
24
|
urls << url
|
@@ -38,8 +38,8 @@ module BibSync
|
|
38
38
|
arxivs.each_slice(SliceSize) do |ids|
|
39
39
|
begin
|
40
40
|
xml = fetch_xml('http://export.arxiv.org/api/query', id_list: ids.join(','), max_results: SliceSize)
|
41
|
-
xml.
|
42
|
-
id.gsub
|
41
|
+
xml.each_element('//entry/id') do |id|
|
42
|
+
id = id.text.gsub('http://arxiv.org/abs/', '')
|
43
43
|
info 'arXiv download', key: id
|
44
44
|
arxiv_download(@dir, id)
|
45
45
|
end
|
@@ -15,6 +15,8 @@ module BibSync
|
|
15
15
|
@bib.to_a.each do |entry|
|
16
16
|
next if entry.comment?
|
17
17
|
|
18
|
+
entry.delete(:abstract) if @force
|
19
|
+
|
18
20
|
if @force || !(entry[:title] && entry[:author] && entry[:year])
|
19
21
|
if entry[:arxiv]
|
20
22
|
if entry.key == arxiv_id(entry, prefix: false, version: true)
|
@@ -27,11 +29,12 @@ module BibSync
|
|
27
29
|
update_doi(entry) if entry[:doi]
|
28
30
|
end
|
29
31
|
|
30
|
-
if entry[:doi] =~ /\A10\.1103\// &&
|
32
|
+
if entry[:doi] =~ /\A10\.1103\// && !entry[:abstract]
|
31
33
|
update_aps_abstract(entry)
|
32
34
|
end
|
33
35
|
|
34
|
-
|
36
|
+
# Add timestamp when this entry was added
|
37
|
+
entry[:added] ||= Date.today.to_s
|
35
38
|
end
|
36
39
|
end
|
37
40
|
|
@@ -39,20 +42,34 @@ module BibSync
|
|
39
42
|
|
40
43
|
def update_aps_abstract(entry)
|
41
44
|
info("Downloading APS abstract", key: entry)
|
42
|
-
html =
|
43
|
-
|
45
|
+
html = fetch("http://link.aps.org/doi/#{entry[:doi]}")
|
46
|
+
if html =~ %r{<div class='aps-abstractbox'>(.*?)</div>}
|
47
|
+
entry[:abstract] = $1.gsub(/<[^>]+>/, '')
|
48
|
+
end
|
44
49
|
rescue => ex
|
45
50
|
error('Abstract download failed', key: entry, ex: ex)
|
46
51
|
end
|
47
52
|
|
48
53
|
def update_doi(entry)
|
49
|
-
|
50
|
-
|
54
|
+
url = "http://dx.doi.org/#{entry[:doi]}"
|
55
|
+
info("Downloading DOI metadata from #{url}", key: entry)
|
56
|
+
text = fetch(url, nil, 'Accept' => 'text/bibliography; style=bibtex')
|
51
57
|
raise text if text == 'Unknown DOI'
|
52
58
|
Entry.parse(text).each {|k, v| entry[k] = v }
|
53
59
|
rescue => ex
|
54
|
-
entry.delete(:doi)
|
55
60
|
error('DOI download failed', key: entry, ex: ex)
|
61
|
+
# dx.doi.org shows spurious 500 errors
|
62
|
+
if ex.respond_to?(:response) && ex.response[:status] == 500
|
63
|
+
tries ||= 0
|
64
|
+
tries += 1
|
65
|
+
if tries < 10
|
66
|
+
info('Retrying...', key: entry)
|
67
|
+
retry
|
68
|
+
else
|
69
|
+
error('Giving up :(', key: entry)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
entry.delete(:doi)
|
56
73
|
end
|
57
74
|
|
58
75
|
# Rename arxiv file if key contains version
|
@@ -89,36 +106,36 @@ module BibSync
|
|
89
106
|
entry.file = new_path
|
90
107
|
end
|
91
108
|
|
92
|
-
@bib.save
|
93
|
-
|
94
109
|
entry
|
95
110
|
end
|
96
111
|
|
97
112
|
def update_arxiv(entry)
|
98
113
|
info('Downloading arXiv metadata', key: entry)
|
114
|
+
|
99
115
|
xml = fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: "oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}", metadataPrefix: 'arXiv')
|
100
|
-
error = xml.
|
101
|
-
raise error if error
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
entry[:
|
106
|
-
entry[:
|
107
|
-
|
116
|
+
error = xml.elements['//error']
|
117
|
+
raise error.text if error
|
118
|
+
|
119
|
+
arXiv = xml.elements['//arXiv']
|
120
|
+
|
121
|
+
entry[:title] = arXiv.elements['title'].text
|
122
|
+
entry[:abstract] = arXiv.elements['abstract'].text
|
123
|
+
entry[:arxivcategories] = arXiv.elements['categories'].text
|
124
|
+
entry[:primaryclass] = entry[:arxivcategories].split(/\s+/).first
|
125
|
+
entry[:author] = arXiv.get_elements('authors/author').map do |author|
|
126
|
+
"{#{author.elements['keyname'].text}}, {#{author.elements['forenames'].text}}"
|
108
127
|
end.join(' and ')
|
109
128
|
entry[:journal] = 'ArXiv e-prints'
|
110
129
|
entry[:eprint] = entry[:arxiv]
|
111
130
|
entry[:archiveprefix] = 'arXiv'
|
112
|
-
|
113
|
-
|
131
|
+
entry[:arxivcreated] = arXiv.elements['created'].text if arXiv.elements['created']
|
132
|
+
entry[:arxivupdated] = arXiv.elements['updated'].text if arXiv.elements['updated']
|
133
|
+
date = Date.parse(entry[:arxivupdated] || entry[:arxivcreated])
|
114
134
|
entry[:year] = date.year
|
115
135
|
entry[:month] = Literal.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
|
116
|
-
doi =
|
117
|
-
entry[:
|
118
|
-
|
119
|
-
entry[:journal] = journal if journal
|
120
|
-
comments = xml.xpath('//arXiv/comments').map(&:content).first
|
121
|
-
entry[:comments] = comments if comments
|
136
|
+
entry[:doi] = arXiv.elements['doi'].text if arXiv.elements['doi']
|
137
|
+
entry[:journal] = arXiv.elements['journal-ref'].text if arXiv.elements['journal-ref']
|
138
|
+
entry[:comments] = arXiv.elements['comments'].text if arXiv.elements['comments']
|
122
139
|
entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
|
123
140
|
rescue => ex
|
124
141
|
entry.delete(:arxiv)
|
data/lib/bibsync/bibliography.rb
CHANGED
@@ -55,8 +55,13 @@ module BibSync
|
|
55
55
|
raise 'No filename given' unless @file
|
56
56
|
if @dirty
|
57
57
|
@save_hook.call(self) if @save_hook
|
58
|
-
|
59
|
-
|
58
|
+
tmpfile = "#{@file}.tmp"
|
59
|
+
begin
|
60
|
+
File.open(tmpfile, 'w') {|f| f.write(self) }
|
61
|
+
File.rename(tmpfile, @file)
|
62
|
+
ensure
|
63
|
+
File.unlink(tmpfile) rescue nil
|
64
|
+
end
|
60
65
|
@dirty = false
|
61
66
|
true
|
62
67
|
else
|
data/lib/bibsync/command.rb
CHANGED
@@ -83,14 +83,15 @@ module BibSync
|
|
83
83
|
|
84
84
|
def process
|
85
85
|
if @args.size != 0
|
86
|
-
|
86
|
+
puts 'Too many arguments'
|
87
87
|
puts @opts
|
88
88
|
exit
|
89
89
|
end
|
90
90
|
|
91
91
|
if @options[:bib]
|
92
|
-
@options[:bib] = Bibliography.new(@options[:bib])
|
93
|
-
|
92
|
+
bib = @options[:bib] = Bibliography.new(@options[:bib])
|
93
|
+
bib.save_hook = Transformer.new
|
94
|
+
at_exit { bib.save }
|
94
95
|
end
|
95
96
|
|
96
97
|
actions = []
|
data/lib/bibsync/utils.rb
CHANGED
@@ -27,13 +27,7 @@ module BibSync
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def fetch_xml(url, params = nil, headers = nil)
|
30
|
-
|
31
|
-
xml.remove_namespaces!
|
32
|
-
xml
|
33
|
-
end
|
34
|
-
|
35
|
-
def fetch_html(url, params = nil, headers = nil)
|
36
|
-
Nokogiri::HTML(fetch(url, params, headers))
|
30
|
+
REXML::Document.new(fetch(url, params, headers)).root
|
37
31
|
end
|
38
32
|
|
39
33
|
def arxiv_id(arxiv, opts = {})
|
data/lib/bibsync/version.rb
CHANGED
data/test/test_utils.rb
CHANGED
@@ -24,13 +24,7 @@ describe BibSync::Utils do
|
|
24
24
|
|
25
25
|
describe '#fetch_xml' do
|
26
26
|
it 'fetches xml' do
|
27
|
-
fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
|
-
describe '#fetch_html' do
|
32
|
-
it 'fetches html' do
|
33
|
-
fetch_html('http://google.com').must_be_instance_of Nokogiri::HTML::Document
|
27
|
+
fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of REXML::Element
|
34
28
|
end
|
35
29
|
end
|
36
30
|
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bibsync
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Daniel Mendler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: nokogiri
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - '>='
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - '>='
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: faraday
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,7 +67,7 @@ dependencies:
|
|
81
67
|
- !ruby/object:Gem::Version
|
82
68
|
version: '0'
|
83
69
|
description: BibSync is a tool to synchronize scientific papers and BibTeX bibliography
|
84
|
-
files
|
70
|
+
files. It automatically downloads the metadata from dx.doi.org and arxiv.org.
|
85
71
|
email:
|
86
72
|
- mail@daniel-mendler.de
|
87
73
|
executables:
|
@@ -135,7 +121,8 @@ files:
|
|
135
121
|
- test/test_entry.rb
|
136
122
|
- test/test_utils.rb
|
137
123
|
homepage: https://github.com/minad/bibsync
|
138
|
-
licenses:
|
124
|
+
licenses:
|
125
|
+
- MIT
|
139
126
|
metadata: {}
|
140
127
|
post_install_message:
|
141
128
|
rdoc_options: []
|