bibsync 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +10 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE +21 -0
  5. data/README.md +88 -0
  6. data/Rakefile +16 -0
  7. data/bibsync.gemspec +4 -2
  8. data/lib/bibsync/actions/{check_versions.rb → check_arxiv_versions.rb} +6 -6
  9. data/lib/bibsync/actions/determine_arxiv_doi.rb +70 -0
  10. data/lib/bibsync/actions/fetch_from_arxiv.rb +11 -9
  11. data/lib/bibsync/actions/find_my_citations.rb +4 -4
  12. data/lib/bibsync/actions/jabref_format.rb +2 -2
  13. data/lib/bibsync/actions/synchronize_files.rb +5 -6
  14. data/lib/bibsync/actions/synchronize_metadata.rb +14 -57
  15. data/lib/bibsync/actions/validate.rb +16 -6
  16. data/lib/bibsync/actions.rb +1 -7
  17. data/lib/bibsync/bibliography.rb +60 -23
  18. data/lib/bibsync/command.rb +13 -8
  19. data/lib/bibsync/log.rb +22 -20
  20. data/lib/bibsync/transformer.rb +1 -1
  21. data/lib/bibsync/utils.rb +7 -9
  22. data/lib/bibsync/version.rb +1 -1
  23. data/test/actions/test_check_arxiv_versions.rb +4 -0
  24. data/test/actions/test_determine_arxiv_doi.rb +61 -0
  25. data/test/actions/test_fetch_from_arxiv.rb +4 -0
  26. data/test/actions/test_find_my_citations.rb +4 -0
  27. data/test/actions/test_jabref_format.rb +4 -0
  28. data/test/actions/test_synchronize_files.rb +4 -0
  29. data/test/actions/test_synchronize_metadata.rb +34 -0
  30. data/test/actions/test_validate.rb +4 -0
  31. data/test/fixture/FileWithEmbeddedArXiv.pdf +0 -0
  32. data/test/fixture/FileWithEmbeddedArXiv.tex +7 -0
  33. data/test/fixture/FileWithEmbeddedDOI.pdf +0 -0
  34. data/test/fixture/FileWithEmbeddedDOI.tex +7 -0
  35. data/test/fixture/entry.bib +8 -0
  36. data/test/fixture/test.bib +34 -0
  37. data/test/helper.rb +21 -0
  38. data/test/test_bibliography.rb +222 -0
  39. data/test/test_utils.rb +54 -0
  40. metadata +63 -16
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 21c0564e45a66b0339bf5b7bafaef25633adcd8e
4
+ data.tar.gz: 34a9ae41d395ba912e95accbb0c2cb87c8de69c3
5
+ SHA512:
6
+ metadata.gz: 46e87958376899e94b3bec951241bb02e3085f2e5141146477d9f8bb0de533e27c0c8d07b4c4e5a314f2d734cd386ee092362d38307e18a4d49303a746ad42c0
7
+ data.tar.gz: 46b15a8cf96461ce3fa3e45aef1585e6806177be668a20410ca940687c5d1f5667d30b1e76701b37503984c0a5ca0aa23025728b0154ee306c868ba8bcdaf1db
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - ruby-head
6
+ - jruby-19mode
7
+ - rbx-19mode
8
+ before_install:
9
+ - sudo apt-get install -qq poppler-utils
10
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
3
+
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2013 Daniel Mendler
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ BibSync
2
+ =======
3
+
4
+ BibSync is a tool to synchronize your paper database with a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file which might be most
5
+ useful for Physicists and Mathematicians since it supports synchronization with [DOI](http://dx.doi.org/) and [arXiv](http://arxiv.org/).
6
+
7
+ I created this tool during the work on my diploma thesis in physics since I was unhappy
8
+ with existing tools like [Mendeley](http://www.mendeley.com/). I use this tool together with Git for version control
9
+ and [JabRef](http://jabref.sourceforge.net/) for browsing. This tool adheres more to the Unix philosophy that a small tool
10
+ for each task is better than one thing which tries to solve everything. If you use [JabRef](http://jabref.sourceforge.net/)
11
+ for browsing and tagging it is unnecessary to sort the papers into different sub directories by hand.
12
+ Just throw them all in one directory!
13
+
14
+ __Note__: This tool is derived from a script which I used during my thesis. It worked
15
+ quite well and reliable during that time. But be aware that I used Git for version control
16
+ of the [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file. So any mistakes which might be made by this tool could be reverted.
17
+
18
+ Features
19
+ --------
20
+
21
+ BibSync supports the following features:
22
+
23
+ * Synchronization between a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file and a directory containing the papers in pdf, ps or djvu format
24
+ * [JabRef](http://jabref.sourceforge.net/) file fields are generated, so you can open the existing papers directly out of [JabRef](http://jabref.sourceforge.net/)
25
+ * Downloading of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) metadata
26
+ * Extraction of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) id out of the file using [pdftotext](http://en.wikipedia.org/wiki/Pdftotext)
27
+ * Downloading of new versions of [arXiv](http://arxiv.org/) papers
28
+ * Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
29
+ * Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
30
+ * Works under every platform supporting Ruby (Linux, Windows, ...)
31
+
32
+ Quick start
33
+ -----------
34
+
35
+ BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
36
+ the command line
37
+
38
+ ~~~
39
+ $ gem install bibsync
40
+ ~~~
41
+
42
+ After that you can use the 'bibsync' tool on the command line. At first let's validate
43
+ a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.
44
+
45
+ ~~~
46
+ $ bibsync -b ~/thesis/thesis.bib
47
+ ~~~
48
+
49
+ Then we want to synchronize all the papers in our paper directory with 'bibsync' and automatically download
50
+ the missing metadata.
51
+
52
+ ~~~
53
+ $ bibsync -d ~/thesis/papers -b ~/thesis/thesis.bib
54
+ ~~~
55
+
56
+ BibSync tries to download the metadata from [arxiv.org](http://arxiv.org) and [dx.doi.org](http://dx.doi.org). If you want to know more about the functions of 'bibsync' take a look at the command line help.
57
+
58
+ ~~~
59
+ $ bibsync --help
60
+ ~~~
61
+
62
+ My setup
63
+ --------
64
+
65
+ * BibSync for synchronizing
66
+ * [JabRef](http://jabref.sourceforge.net/) for browsing the bibliography, tagging and categorizing papers
67
+ * [Biblatex](http://www.ctan.org/pkg/biblatex) to include a bibliography in LaTeX with full Unicode support
68
+
69
+ Alternatives
70
+ ------------
71
+
72
+ * [Mendeley](http://www.mendeley.com/) (Commercial, synchronizes with their server, limited disk space, bloated gui application)
73
+ * [Zotero](http://www.zotero.org/) (Firefox plugin, Open source)
74
+
75
+ A better name?
76
+ --------------
77
+
78
+ If you have a suggestion for a better name, just let me know...
79
+
80
+ Author
81
+ ------
82
+
83
+ Daniel Mendler
84
+
85
+ License
86
+ -------
87
+
88
+ See LICENSE
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ begin
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+ rescue Exception
5
+ end
6
+
7
+ require 'rake/testtask'
8
+
9
+ Rake::TestTask.new :test do |t|
10
+ t.libs << 'lib' << 'test'
11
+ t.test_files = FileList['test/**/test_*.rb']
12
+ t.verbose = true
13
+ t.ruby_opts << '-w' << '-v'
14
+ end
15
+
16
+ task :default => :test
data/bibsync.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |s|
8
8
  s.date = Date.today.to_s
9
9
  s.authors = ['Daniel Mendler']
10
10
  s.email = ['mail@daniel-mendler.de']
11
- s.summary = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
12
- s.description = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
11
+ s.summary = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
12
+ s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
13
13
  s.homepage = 'https://github.com/minad/bibsync'
14
14
  s.rubyforge_project = s.name
15
15
 
@@ -18,4 +18,6 @@ Gem::Specification.new do |s|
18
18
  s.require_paths = %w(lib)
19
19
 
20
20
  s.add_runtime_dependency('nokogiri')
21
+ s.add_development_dependency('rake')
22
+ s.add_development_dependency('minitest')
21
23
  end
@@ -1,14 +1,14 @@
1
1
  module BibSync
2
2
  module Actions
3
- class CheckVersions
3
+ class CheckArXivVersions
4
4
  include Log
5
5
  include Utils
6
6
 
7
7
  SliceSize = 20
8
8
 
9
9
  def initialize(options)
10
- raise 'Bibliography must be set' unless @bib = options[:bib]
11
- raise 'Directory must be set' unless @dir = options[:dir]
10
+ raise 'Option :bib is required' unless @bib = options[:bib]
11
+ raise 'Option :dir is required' unless @dir = options[:dir]
12
12
  @update = options[:update]
13
13
  end
14
14
 
@@ -16,16 +16,16 @@ module BibSync
16
16
  notice 'Check for newer version on arXiv'
17
17
  @bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
18
18
  begin
19
- xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, :version => false, :prefix => true) }.join(',')}&max_results=#{SliceSize}")
19
+ xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(',')}&max_results=#{SliceSize}")
20
20
  xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
21
21
  id.gsub!('http://arxiv.org/abs/', '')
22
22
  if id != entry[i][:arxiv]
23
- info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", :key => entry[i])
23
+ info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
24
24
  arxiv_download(@dir, id) if @update
25
25
  end
26
26
  end
27
27
  rescue => ex
28
- error('arXiv query failed', :ex => ex)
28
+ error('arXiv query failed', ex: ex)
29
29
  end
30
30
  end
31
31
 
@@ -0,0 +1,70 @@
1
+ module BibSync
2
+ module Actions
3
+ class DetermineArXivDOI
4
+ include Utils
5
+ include Log
6
+
7
+ def initialize(options)
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
+ @force = options[:resync]
10
+ end
11
+
12
+ def run
13
+ notice 'Determine arXiv and DOI identifiers'
14
+
15
+ @bib.each do |entry|
16
+ next if entry.comment? ||
17
+ (entry[:doi] && entry[:arxiv]) ||
18
+ (!@force && entry[:title] && entry[:author] && entry[:year])
19
+
20
+ determine_arxiv_and_doi(entry)
21
+
22
+ @bib.save
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def determine_arxiv_and_doi(entry)
29
+ if file = entry.file
30
+ if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
31
+ debug('Searching for arXiv or doi identifier in pdf file', key: entry)
32
+ text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
33
+ entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
34
+ entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
35
+ end
36
+
37
+ if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
38
+ debug('Interpreting file name as arXiv identifier', key: entry)
39
+ entry[:arxiv] = $1
40
+ end
41
+
42
+ if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
43
+ debug('Interpreting file name as doi identifier', key: entry)
44
+ entry[:doi] = "10.1103/#{$1}"
45
+ end
46
+ end
47
+
48
+ if !entry[:arxiv] && entry[:doi]
49
+ begin
50
+ info('Fetch missing arXiv identifier', key: entry)
51
+ xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
52
+ if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
53
+ id = xml.xpath('//entry/id').map(&:content).first
54
+ if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
55
+ entry[:arxiv] = $1
56
+ end
57
+ end
58
+ rescue => ex
59
+ error('arXiv query by DOI failed', ex: ex, key: entry)
60
+ end
61
+ end
62
+
63
+ unless entry[:arxiv] || entry[:doi]
64
+ warning('No arXiv or DOI identifier found', key: entry)
65
+ end
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -7,17 +7,19 @@ module BibSync
7
7
  include Utils
8
8
 
9
9
  def initialize(options)
10
- raise 'Fetch must be set' unless @fetch = options[:fetch]
11
- raise 'Directory must be set' unless @dir = options[:dir]
10
+ raise 'Option :fetch is required' unless @fetch = options[:fetch]
11
+ raise 'Option :dir is required' unless @dir = options[:dir]
12
12
  end
13
13
 
14
14
  def run
15
- ids = []
15
+ arxivs = []
16
16
  urls = []
17
17
 
18
18
  @fetch.each do |url|
19
- if url =~ %r{^http://arxiv.org/abs/(\d+\.\d+)$}
20
- ids << $1
19
+ if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
20
+ arxivs << $1
21
+ elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
22
+ arxivs << $1
21
23
  else
22
24
  urls << url
23
25
  end
@@ -31,18 +33,18 @@ module BibSync
31
33
  end
32
34
  end
33
35
 
34
- unless ids.empty?
36
+ unless arxivs.empty?
35
37
  notice 'Downloading from arXiv'
36
- ids.each_slice(SliceSize) do |ids|
38
+ arxivs.each_slice(SliceSize) do |ids|
37
39
  begin
38
40
  xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{ids.join(',')}&max_results=#{SliceSize}")
39
41
  xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
40
42
  id.gsub!('http://arxiv.org/abs/', '')
41
- info 'arXiv download', :key => id
43
+ info 'arXiv download', key: id
42
44
  arxiv_download(@dir, id)
43
45
  end
44
46
  rescue => ex
45
- error('arXiv query failed', :ex => ex)
47
+ error('arXiv query failed', ex: ex)
46
48
  end
47
49
  end
48
50
  end
@@ -5,8 +5,8 @@ module BibSync
5
5
  include Utils
6
6
 
7
7
  def initialize(options)
8
- raise 'Bibliography must be set' unless @bib = options[:bib]
9
- raise 'Tex directory must be set' unless @dir = options[:citedbyme]
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
+ raise 'Option :citedbyme is required' unless @dir = options[:citedbyme]
10
10
  raise "#{@dir} is not a directory" unless File.directory?(@dir)
11
11
  end
12
12
 
@@ -19,7 +19,7 @@ module BibSync
19
19
  $1.split(/\s*,\s*/).each do |key|
20
20
  key.strip!
21
21
  file = @bib.relative_path(file)
22
- debug("Cited in #{file}", :key => key)
22
+ debug("Cited in #{file}", key: key)
23
23
  (cites[key] ||= []) << file
24
24
  end
25
25
  end
@@ -35,7 +35,7 @@ module BibSync
35
35
  if @bib[key]
36
36
  @bib[key][:citedbyme] = files
37
37
  else
38
- warning("Cited in #{files} but not found in #{@bib.file}", :key => key)
38
+ warning("Cited in #{files} but not found in #{@bib.file}", key: key)
39
39
  end
40
40
  end
41
41
 
@@ -1,11 +1,11 @@
1
1
  module BibSync
2
2
  module Actions
3
- class JabrefFormat
3
+ class JabRefFormat
4
4
  include Utils
5
5
  include Log
6
6
 
7
7
  def initialize(options)
8
- raise 'Bibliography must be set' unless @bib = options[:bib]
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
9
  end
10
10
 
11
11
  def run
@@ -7,8 +7,8 @@ module BibSync
7
7
  FileTypes = %w(djvu pdf ps)
8
8
 
9
9
  def initialize(options)
10
- raise 'Bibliography must be set' unless @bib = options[:bib]
11
- raise 'Directory must be set' unless @dir = options[:dir]
10
+ raise 'Option :bib is required' unless @bib = options[:bib]
11
+ raise 'Option :dir is required' unless @dir = options[:dir]
12
12
  end
13
13
 
14
14
  def run
@@ -17,16 +17,15 @@ module BibSync
17
17
  files = {}
18
18
  Dir[File.join(@dir, "**/*.{#{FileTypes.join(',')}}")].sort.each do |file|
19
19
  name = File.basename(file)
20
- key, type = split_filename(name)
20
+ key = name_without_ext(name)
21
21
  raise "Duplicate file #{name}" if files[key]
22
22
  files[key] = file
23
23
  end
24
24
 
25
25
  files.each do |key, file|
26
26
  unless entry = @bib[key]
27
- info('New file', :key => key)
28
- entry = Bibliography::Entry.new
29
- entry.key = key
27
+ info('New file', key: key)
28
+ entry = Bibliography::Entry.new(key: key)
30
29
  @bib << entry
31
30
  end
32
31
 
@@ -5,7 +5,7 @@ module BibSync
5
5
  include Log
6
6
 
7
7
  def initialize(options)
8
- raise 'Bibliography must be set' unless @bib = options[:bib]
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
9
  @force = options[:resync]
10
10
  end
11
11
 
@@ -16,10 +16,8 @@ module BibSync
16
16
  next if entry.comment?
17
17
 
18
18
  if @force || !(entry[:title] && entry[:author] && entry[:year])
19
- determine_arxiv_and_doi(entry)
20
-
21
19
  if entry[:arxiv]
22
- if entry.key == arxiv_id(entry, :prefix => false, :version => true)
20
+ if entry.key == arxiv_id(entry, prefix: false, version: true)
23
21
  entry = rename_arxiv_file(entry)
24
22
  next unless entry
25
23
  end
@@ -40,28 +38,28 @@ module BibSync
40
38
  private
41
39
 
42
40
  def update_aps_abstract(entry)
43
- info("Downloading APS abstract", :key => entry)
41
+ info("Downloading APS abstract", key: entry)
44
42
  html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
45
43
  entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
46
44
  rescue => ex
47
- error('Abstract download failed', :key => entry, :ex => ex)
45
+ error('Abstract download failed', key: entry, ex: ex)
48
46
  end
49
47
 
50
48
  def update_doi(entry)
51
- info('Downloading doi.org metadata', :key => entry)
49
+ info('Downloading DOI metadata', key: entry)
52
50
  text = fetch("http://dx.doi.org/#{entry[:doi]}", 'Accept' => 'text/bibliography; style=bibtex')
53
51
  raise text if text == 'Unknown DOI'
54
52
  Bibliography::Entry.parse(text).each {|k, v| entry[k] = v }
55
53
  rescue => ex
56
54
  entry.delete(:doi)
57
- error('doi download failed', :key => entry, :ex => ex)
55
+ error('DOI download failed', key: entry, ex: ex)
58
56
  end
59
57
 
60
58
  # Rename arxiv file if key contains version
61
59
  def rename_arxiv_file(entry)
62
60
  file = entry.file
63
61
 
64
- key = arxiv_id(entry, :prefix => false, :version => false)
62
+ key = arxiv_id(entry, prefix: false, version: false)
65
63
 
66
64
  if old_entry = @bib[key]
67
65
  # Existing entry found
@@ -71,7 +69,7 @@ module BibSync
71
69
  entry[:arxiv] =~ /v(\d+)$/
72
70
  new_version = $1
73
71
  if old_version && new_version && old_version >= new_version
74
- info('Not updating existing entry with older version', :key => old_entry)
72
+ info('Not updating existing entry with older version', key: old_entry)
75
73
  File.delete(file[:path]) if file
76
74
  return nil
77
75
  end
@@ -79,14 +77,14 @@ module BibSync
79
77
  old_entry[:arxiv] = entry[:arxiv]
80
78
  old_entry[:doi] = entry[:doi]
81
79
  entry = old_entry
82
- info('Updating existing entry', :key => entry)
80
+ info('Updating existing entry', key: entry)
83
81
  else
84
82
  # This is a new entry
85
83
  entry.key = key
86
84
  end
87
85
 
88
86
  if file
89
- new_path = file[:path].sub(arxiv_id(entry, :prefix => false, :version => true), key)
87
+ new_path = file[:path].sub(arxiv_id(entry, prefix: false, version: true), key)
90
88
  File.rename(file[:path], new_path)
91
89
  entry.file = new_path
92
90
  end
@@ -97,8 +95,8 @@ module BibSync
97
95
  end
98
96
 
99
97
  def update_arxiv(entry)
100
- info('Downloading arXiv metadata', :key => entry)
101
- xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, :prefix => true, :version => false)}&metadataPrefix=arXiv")
98
+ info('Downloading arXiv metadata', key: entry)
99
+ xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}&metadataPrefix=arXiv")
102
100
  error = xml.xpath('//error').map(&:content).first
103
101
  raise error if error
104
102
 
@@ -108,7 +106,7 @@ module BibSync
108
106
  entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
109
107
  "{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
110
108
  end.join(' and ')
111
- entry[:journal] = ArXivJournal
109
+ entry[:journal] = 'ArXiv e-prints'
112
110
  entry[:eprint] = entry[:arxiv]
113
111
  entry[:archiveprefix] = 'arXiv'
114
112
  date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
@@ -124,49 +122,8 @@ module BibSync
124
122
  entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
125
123
  rescue => ex
126
124
  entry.delete(:arxiv)
127
- error('arXiv download failed', :key => entry, :ex => ex)
125
+ error('arXiv download failed', key: entry, ex: ex)
128
126
  end
129
-
130
- def determine_arxiv_and_doi(entry)
131
- if file = entry.file
132
- if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
133
- debug('Searching for arXiv or doi identifier in pdf file', :key => entry)
134
- text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
135
- entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
136
- entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
137
- end
138
-
139
- if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
140
- debug('Interpreting file name as arXiv identifier', :key => entry)
141
- entry[:arxiv] = $1
142
- end
143
-
144
- if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
145
- debug('Interpreting file name as doi identifier', :key => entry)
146
- entry[:doi] = "10.1103/#{$1}"
147
- end
148
- end
149
-
150
- if !entry[:arxiv] && entry[:doi]
151
- begin
152
- info('Fetch missing arXiv identifier', :key => entry)
153
- xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
154
- if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
155
- id = xml.xpath('//entry/id').map(&:content).first
156
- if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
157
- entry[:arxiv] = $1
158
- end
159
- end
160
- rescue => ex
161
- error('arXiv doi query failed', :ex => ex, :key => entry)
162
- end
163
- end
164
-
165
- unless entry[:arxiv] || entry[:doi]
166
- warning('No arXiv or doi identifier found', :key => entry)
167
- end
168
- end
169
-
170
127
  end
171
128
  end
172
129
  end
@@ -10,7 +10,7 @@ module BibSync
10
10
 
11
11
  def run
12
12
  notice 'Check validity'
13
- titles, arxivs = {}, {}
13
+ titles, arxivs, dois = {}, {}, {}
14
14
 
15
15
  @bib.each do |entry|
16
16
  next if entry.comment?
@@ -18,14 +18,16 @@ module BibSync
18
18
  w = []
19
19
 
20
20
  file = entry.file
21
- w << 'Missing file' unless file && File.file?(file[:path])
22
21
 
23
- w += [:title, :author, :year, :abstract].reject {|k| entry[k] }.map {|k| "Missing #{k}" }
22
+ missing = []
23
+ missing << :file unless file && File.file?(file[:path])
24
+ missing += [:title, :author, :year, :abstract].reject {|k| entry[k] }
25
+ w << "Missing #{missing.map(&:to_s).sort.join(', ')}" unless missing.empty?
24
26
 
25
- w << 'Invalid file' if split_filename(file[:name]).first != entry.key if file
27
+ w << 'File name does not match entry key' if name_without_ext(file[:name]) != entry.key if file
26
28
 
27
29
  if entry[:arxiv]
28
- id = arxiv_id(entry, :version => false, :prefix => true)
30
+ id = arxiv_id(entry, version: false, prefix: true)
29
31
  if arxivs.include?(id)
30
32
  w << "ArXiv duplicate of '#{arxivs[id]}'"
31
33
  else
@@ -33,6 +35,14 @@ module BibSync
33
35
  end
34
36
  end
35
37
 
38
+ if id = entry[:doi]
39
+ if dois.include?(id)
40
+ w << "DOI duplicate of '#{dois[id]}'"
41
+ else
42
+ dois[id] = entry.key
43
+ end
44
+ end
45
+
36
46
  if entry[:title]
37
47
  if titles.include?(entry[:title])
38
48
  w << "Title duplicate of '#{titles[entry[:title]]}'"
@@ -41,7 +51,7 @@ module BibSync
41
51
  end
42
52
  end
43
53
 
44
- warning(w.join(', '), :key => entry) unless w.empty?
54
+ warning(w.join('; '), key: entry) unless w.empty?
45
55
  end
46
56
  end
47
57
  end
@@ -1,7 +1 @@
1
- require 'bibsync/actions/check_versions'
2
- require 'bibsync/actions/synchronize_files'
3
- require 'bibsync/actions/synchronize_metadata'
4
- require 'bibsync/actions/validate'
5
- require 'bibsync/actions/jabref_format'
6
- require 'bibsync/actions/fetch_from_arxiv'
7
- require 'bibsync/actions/find_my_citations'
1
+ Dir[File.join(File.dirname(__FILE__), 'actions', '*.rb')].each {|f| require f }