bibsync 0.0.5 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 656b20418877066a48f7e30f3f702b47e71c7a5d
4
- data.tar.gz: af9f5d4f088255dccc86bd12655a009830050ac3
3
+ metadata.gz: 5578a98ae327ca6b8ee4b5fb0da4b2bbd1786116
4
+ data.tar.gz: eb56b9ab2ce3d9f1b6599da6a1b4ca0c76db220e
5
5
  SHA512:
6
- metadata.gz: 55e6153fa4ffa968cbf1f08dba256c2a0c3b427f6a84babc16d99e62bb519e2450335191dd6ce0c4bc8eac1a830d1ba27999215a517b35feb24f4eda2ed92cd5
7
- data.tar.gz: 4f9a2308012649be07f5bf3c8549c1a9ae9a1c0410e8c1f33de47e7991dcd1f067e83ea3e96a5c67e518a7b67980ad358bfa24f641a9f6f12740bff76da35449
6
+ metadata.gz: 3050a0ec740223617d6f700bca7725443e3ab2b1a1cce7ef46c6091ed777d18b3bdc3f6f9ad17abcce3b40f923959069960d977128c112cfbe84e933a3bcfbcd
7
+ data.tar.gz: d4692dcd5ce86c2cfaf6698a7c77ca5450487067077ba87804325b54cf77a2e7654ae025914819476be79f649e56d21e1c5e056402e46dc02a5d8a3113ed0a57
data/.gitignore CHANGED
@@ -1,6 +1,7 @@
1
1
  *.swp
2
2
  *.gem
3
+ .#*
3
4
  Gemfile.lock
4
5
  .bundle
5
6
  .yardoc
6
-
7
+ test/tmp
@@ -8,4 +8,6 @@ rvm:
8
8
  before_install:
9
9
  - sudo apt-get update -qq
10
10
  - sudo apt-get install -qq poppler-utils jabref
11
-
11
+ matrix:
12
+ allow_failures:
13
+ - rvm: ruby-head
data/README.md CHANGED
@@ -27,11 +27,21 @@ BibSync supports the following features:
27
27
  * Downloading of new versions of [arXiv](http://arxiv.org/) papers
28
28
  * Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
29
29
  * Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
30
- * Works under every platform supporting Ruby (Linux, Windows, ...)
30
+ * Works under every platform supporting Ruby and `pdftotext` (Linux, Windows, ...)
31
31
 
32
32
  Quick start
33
33
  -----------
34
34
 
35
+ At first you have to ensure that you have the `pdftotext` program available on your `$PATH`. Under Debian you can install
36
+ the package using `apt-get` as follows
37
+
38
+ ~~~
39
+ $ apt-get install poppler-utils
40
+ $ pdftotext
41
+ pdftotext version 0.24.1
42
+ ...
43
+ ~~~
44
+
35
45
  BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
36
46
  the command line
37
47
 
@@ -39,6 +49,12 @@ the command line
39
49
  $ gem install bibsync
40
50
  ~~~
41
51
 
52
+ And for updating, you write
53
+
54
+ ~~~
55
+ $ gem update bibsync
56
+ ~~~
57
+
42
58
  After that you can use the 'bibsync' tool on the command line. At first let's validate
43
59
  a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.
44
60
 
@@ -9,15 +9,15 @@ Gem::Specification.new do |s|
9
9
  s.authors = ['Daniel Mendler']
10
10
  s.email = ['mail@daniel-mendler.de']
11
11
  s.summary = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
12
- s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
12
+ s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files. It automatically downloads the metadata from dx.doi.org and arxiv.org.'
13
13
  s.homepage = 'https://github.com/minad/bibsync'
14
14
  s.rubyforge_project = s.name
15
+ s.license = 'MIT'
15
16
 
16
17
  s.files = `git ls-files`.split("\n")
17
18
  s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
18
19
  s.require_paths = %w(lib)
19
20
 
20
- s.add_runtime_dependency('nokogiri')
21
21
  s.add_runtime_dependency('faraday')
22
22
  s.add_runtime_dependency('faraday_middleware')
23
23
  s.add_development_dependency('rake')
@@ -1,10 +1,10 @@
1
- require 'nokogiri'
2
1
  require 'faraday'
3
2
  require 'faraday_middleware'
4
3
  require 'shellwords'
5
4
  require 'date'
6
5
  require 'pathname'
7
6
  require 'forwardable'
7
+ require 'rexml/document'
8
8
  require 'bibsync/version'
9
9
  require 'bibsync/utils'
10
10
  require 'bibsync/log'
@@ -17,8 +17,8 @@ module BibSync
17
17
  @bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
18
18
  begin
19
19
  xml = fetch_xml('http://export.arxiv.org/api/query', id_list: entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(','), max_results: SliceSize)
20
- xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
21
- id.gsub!('http://arxiv.org/abs/', '')
20
+ xml.get_elements('//entry').each_with_index do |e, i|
21
+ id = e.elements['id'].text.gsub('http://arxiv.org/abs/', '')
22
22
  if id != entry[i][:arxiv]
23
23
  info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
24
24
  arxiv_download(@dir, id) if @update
@@ -18,8 +18,6 @@ module BibSync
18
18
  (!@force && entry[:title] && entry[:author] && entry[:year])
19
19
 
20
20
  determine_arxiv_and_doi(entry)
21
-
22
- @bib.save
23
21
  end
24
22
  end
25
23
 
@@ -49,8 +47,9 @@ module BibSync
49
47
  begin
50
48
  info('Fetch missing arXiv identifier', key: entry)
51
49
  xml = fetch_xml('http://export.arxiv.org/api/query', search_query: "doi:#{entry[:doi]}", max_results: 1)
52
- if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
53
- id = xml.xpath('//entry/id').map(&:content).first
50
+ doi = xml.elements['//arxiv:doi']
51
+ if doi && doi.text == entry[:doi]
52
+ id = xml.elements['//entry/id'].text
54
53
  if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
55
54
  entry[:arxiv] = $1
56
55
  end
@@ -18,7 +18,7 @@ module BibSync
18
18
  @fetch.each do |url|
19
19
  if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
20
20
  arxivs << $1
21
- elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
21
+ elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)(v\d+)?\Z}
22
22
  arxivs << $1
23
23
  else
24
24
  urls << url
@@ -38,8 +38,8 @@ module BibSync
38
38
  arxivs.each_slice(SliceSize) do |ids|
39
39
  begin
40
40
  xml = fetch_xml('http://export.arxiv.org/api/query', id_list: ids.join(','), max_results: SliceSize)
41
- xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
42
- id.gsub!('http://arxiv.org/abs/', '')
41
+ xml.each_element('//entry/id') do |id|
42
+ id = id.text.gsub('http://arxiv.org/abs/', '')
43
43
  info 'arXiv download', key: id
44
44
  arxiv_download(@dir, id)
45
45
  end
@@ -38,8 +38,6 @@ module BibSync
38
38
  warning("Cited in #{files} but not found in #{@bib.file}", key: key)
39
39
  end
40
40
  end
41
-
42
- @bib.save
43
41
  end
44
42
  end
45
43
  end
@@ -32,8 +32,6 @@ module BibSync
32
32
  entry.type ||= :ARTICLE
33
33
  entry.file = file
34
34
  end
35
-
36
- @bib.save
37
35
  end
38
36
  end
39
37
  end
@@ -15,6 +15,8 @@ module BibSync
15
15
  @bib.to_a.each do |entry|
16
16
  next if entry.comment?
17
17
 
18
+ entry.delete(:abstract) if @force
19
+
18
20
  if @force || !(entry[:title] && entry[:author] && entry[:year])
19
21
  if entry[:arxiv]
20
22
  if entry.key == arxiv_id(entry, prefix: false, version: true)
@@ -27,11 +29,12 @@ module BibSync
27
29
  update_doi(entry) if entry[:doi]
28
30
  end
29
31
 
30
- if entry[:doi] =~ /\A10\.1103\// && (@force || !entry[:abstract])
32
+ if entry[:doi] =~ /\A10\.1103\// && !entry[:abstract]
31
33
  update_aps_abstract(entry)
32
34
  end
33
35
 
34
- @bib.save
36
+ # Add timestamp when this entry was added
37
+ entry[:added] ||= Date.today.to_s
35
38
  end
36
39
  end
37
40
 
@@ -39,20 +42,34 @@ module BibSync
39
42
 
40
43
  def update_aps_abstract(entry)
41
44
  info("Downloading APS abstract", key: entry)
42
- html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
43
- entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
45
+ html = fetch("http://link.aps.org/doi/#{entry[:doi]}")
46
+ if html =~ %r{<div class='aps-abstractbox'>(.*?)</div>}
47
+ entry[:abstract] = $1.gsub(/<[^>]+>/, '')
48
+ end
44
49
  rescue => ex
45
50
  error('Abstract download failed', key: entry, ex: ex)
46
51
  end
47
52
 
48
53
  def update_doi(entry)
49
- info('Downloading DOI metadata', key: entry)
50
- text = fetch("http://dx.doi.org/#{entry[:doi]}", nil, 'Accept' => 'text/bibliography; style=bibtex')
54
+ url = "http://dx.doi.org/#{entry[:doi]}"
55
+ info("Downloading DOI metadata from #{url}", key: entry)
56
+ text = fetch(url, nil, 'Accept' => 'text/bibliography; style=bibtex')
51
57
  raise text if text == 'Unknown DOI'
52
58
  Entry.parse(text).each {|k, v| entry[k] = v }
53
59
  rescue => ex
54
- entry.delete(:doi)
55
60
  error('DOI download failed', key: entry, ex: ex)
61
+ # dx.doi.org shows spurious 500 errors
62
+ if ex.respond_to?(:response) && ex.response[:status] == 500
63
+ tries ||= 0
64
+ tries += 1
65
+ if tries < 10
66
+ info('Retrying...', key: entry)
67
+ retry
68
+ else
69
+ error('Giving up :(', key: entry)
70
+ end
71
+ end
72
+ entry.delete(:doi)
56
73
  end
57
74
 
58
75
  # Rename arxiv file if key contains version
@@ -89,36 +106,36 @@ module BibSync
89
106
  entry.file = new_path
90
107
  end
91
108
 
92
- @bib.save
93
-
94
109
  entry
95
110
  end
96
111
 
97
112
  def update_arxiv(entry)
98
113
  info('Downloading arXiv metadata', key: entry)
114
+
99
115
  xml = fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: "oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}", metadataPrefix: 'arXiv')
100
- error = xml.xpath('//error').map(&:content).first
101
- raise error if error
102
-
103
- entry[:title] = xml.xpath('//arXiv/title').map(&:content).first
104
- entry[:abstract] = xml.xpath('//arXiv/abstract').map(&:content).first
105
- entry[:primaryclass] = xml.xpath('//arXiv/categories').map(&:content).first.split(/\s+/).first
106
- entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
107
- "{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
116
+ error = xml.elements['//error']
117
+ raise error.text if error
118
+
119
+ arXiv = xml.elements['//arXiv']
120
+
121
+ entry[:title] = arXiv.elements['title'].text
122
+ entry[:abstract] = arXiv.elements['abstract'].text
123
+ entry[:arxivcategories] = arXiv.elements['categories'].text
124
+ entry[:primaryclass] = entry[:arxivcategories].split(/\s+/).first
125
+ entry[:author] = arXiv.get_elements('authors/author').map do |author|
126
+ "{#{author.elements['keyname'].text}}, {#{author.elements['forenames'].text}}"
108
127
  end.join(' and ')
109
128
  entry[:journal] = 'ArXiv e-prints'
110
129
  entry[:eprint] = entry[:arxiv]
111
130
  entry[:archiveprefix] = 'arXiv'
112
- date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
113
- date = Date.parse(date)
131
+ entry[:arxivcreated] = arXiv.elements['created'].text if arXiv.elements['created']
132
+ entry[:arxivupdated] = arXiv.elements['updated'].text if arXiv.elements['updated']
133
+ date = Date.parse(entry[:arxivupdated] || entry[:arxivcreated])
114
134
  entry[:year] = date.year
115
135
  entry[:month] = Literal.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
116
- doi = xml.xpath('//arXiv/doi').map(&:content).first
117
- entry[:doi] = doi if doi
118
- journal = xml.xpath('//arXiv/journal-ref').map(&:content).first
119
- entry[:journal] = journal if journal
120
- comments = xml.xpath('//arXiv/comments').map(&:content).first
121
- entry[:comments] = comments if comments
136
+ entry[:doi] = arXiv.elements['doi'].text if arXiv.elements['doi']
137
+ entry[:journal] = arXiv.elements['journal-ref'].text if arXiv.elements['journal-ref']
138
+ entry[:comments] = arXiv.elements['comments'].text if arXiv.elements['comments']
122
139
  entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
123
140
  rescue => ex
124
141
  entry.delete(:arxiv)
@@ -55,8 +55,13 @@ module BibSync
55
55
  raise 'No filename given' unless @file
56
56
  if @dirty
57
57
  @save_hook.call(self) if @save_hook
58
- File.open("#{@file}.tmp", 'w') {|f| f.write(self) }
59
- File.rename("#{@file}.tmp", @file)
58
+ tmpfile = "#{@file}.tmp"
59
+ begin
60
+ File.open(tmpfile, 'w') {|f| f.write(self) }
61
+ File.rename(tmpfile, @file)
62
+ ensure
63
+ File.unlink(tmpfile) rescue nil
64
+ end
60
65
  @dirty = false
61
66
  true
62
67
  else
@@ -83,14 +83,15 @@ module BibSync
83
83
 
84
84
  def process
85
85
  if @args.size != 0
86
- error 'Too many arguments'
86
+ puts 'Too many arguments'
87
87
  puts @opts
88
88
  exit
89
89
  end
90
90
 
91
91
  if @options[:bib]
92
- @options[:bib] = Bibliography.new(@options[:bib])
93
- @options[:bib].save_hook = Transformer.new
92
+ bib = @options[:bib] = Bibliography.new(@options[:bib])
93
+ bib.save_hook = Transformer.new
94
+ at_exit { bib.save }
94
95
  end
95
96
 
96
97
  actions = []
@@ -27,13 +27,7 @@ module BibSync
27
27
  end
28
28
 
29
29
  def fetch_xml(url, params = nil, headers = nil)
30
- xml = Nokogiri::XML(fetch(url, params, headers))
31
- xml.remove_namespaces!
32
- xml
33
- end
34
-
35
- def fetch_html(url, params = nil, headers = nil)
36
- Nokogiri::HTML(fetch(url, params, headers))
30
+ REXML::Document.new(fetch(url, params, headers)).root
37
31
  end
38
32
 
39
33
  def arxiv_id(arxiv, opts = {})
@@ -1,3 +1,3 @@
1
1
  module BibSync
2
- VERSION = '0.0.5'
2
+ VERSION = '0.0.8'
3
3
  end
@@ -24,13 +24,7 @@ describe BibSync::Utils do
24
24
 
25
25
  describe '#fetch_xml' do
26
26
  it 'fetches xml' do
27
- fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of Nokogiri::XML::Document
28
- end
29
- end
30
-
31
- describe '#fetch_html' do
32
- it 'fetches html' do
33
- fetch_html('http://google.com').must_be_instance_of Nokogiri::HTML::Document
27
+ fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of REXML::Element
34
28
  end
35
29
  end
36
30
 
metadata CHANGED
@@ -1,29 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bibsync
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel Mendler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-07-04 00:00:00.000000000 Z
11
+ date: 2013-10-08 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: nokogiri
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - '>='
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
13
  - !ruby/object:Gem::Dependency
28
14
  name: faraday
29
15
  requirement: !ruby/object:Gem::Requirement
@@ -81,7 +67,7 @@ dependencies:
81
67
  - !ruby/object:Gem::Version
82
68
  version: '0'
83
69
  description: BibSync is a tool to synchronize scientific papers and BibTeX bibliography
84
- files
70
+ files. It automatically downloads the metadata from dx.doi.org and arxiv.org.
85
71
  email:
86
72
  - mail@daniel-mendler.de
87
73
  executables:
@@ -135,7 +121,8 @@ files:
135
121
  - test/test_entry.rb
136
122
  - test/test_utils.rb
137
123
  homepage: https://github.com/minad/bibsync
138
- licenses: []
124
+ licenses:
125
+ - MIT
139
126
  metadata: {}
140
127
  post_install_message:
141
128
  rdoc_options: []