bibsync 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.travis.yml +10 -0
  3. data/Gemfile +3 -0
  4. data/LICENSE +21 -0
  5. data/README.md +88 -0
  6. data/Rakefile +16 -0
  7. data/bibsync.gemspec +4 -2
  8. data/lib/bibsync/actions/{check_versions.rb → check_arxiv_versions.rb} +6 -6
  9. data/lib/bibsync/actions/determine_arxiv_doi.rb +70 -0
  10. data/lib/bibsync/actions/fetch_from_arxiv.rb +11 -9
  11. data/lib/bibsync/actions/find_my_citations.rb +4 -4
  12. data/lib/bibsync/actions/jabref_format.rb +2 -2
  13. data/lib/bibsync/actions/synchronize_files.rb +5 -6
  14. data/lib/bibsync/actions/synchronize_metadata.rb +14 -57
  15. data/lib/bibsync/actions/validate.rb +16 -6
  16. data/lib/bibsync/actions.rb +1 -7
  17. data/lib/bibsync/bibliography.rb +60 -23
  18. data/lib/bibsync/command.rb +13 -8
  19. data/lib/bibsync/log.rb +22 -20
  20. data/lib/bibsync/transformer.rb +1 -1
  21. data/lib/bibsync/utils.rb +7 -9
  22. data/lib/bibsync/version.rb +1 -1
  23. data/test/actions/test_check_arxiv_versions.rb +4 -0
  24. data/test/actions/test_determine_arxiv_doi.rb +61 -0
  25. data/test/actions/test_fetch_from_arxiv.rb +4 -0
  26. data/test/actions/test_find_my_citations.rb +4 -0
  27. data/test/actions/test_jabref_format.rb +4 -0
  28. data/test/actions/test_synchronize_files.rb +4 -0
  29. data/test/actions/test_synchronize_metadata.rb +34 -0
  30. data/test/actions/test_validate.rb +4 -0
  31. data/test/fixture/FileWithEmbeddedArXiv.pdf +0 -0
  32. data/test/fixture/FileWithEmbeddedArXiv.tex +7 -0
  33. data/test/fixture/FileWithEmbeddedDOI.pdf +0 -0
  34. data/test/fixture/FileWithEmbeddedDOI.tex +7 -0
  35. data/test/fixture/entry.bib +8 -0
  36. data/test/fixture/test.bib +34 -0
  37. data/test/helper.rb +21 -0
  38. data/test/test_bibliography.rb +222 -0
  39. data/test/test_utils.rb +54 -0
  40. metadata +63 -16
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 21c0564e45a66b0339bf5b7bafaef25633adcd8e
4
+ data.tar.gz: 34a9ae41d395ba912e95accbb0c2cb87c8de69c3
5
+ SHA512:
6
+ metadata.gz: 46e87958376899e94b3bec951241bb02e3085f2e5141146477d9f8bb0de533e27c0c8d07b4c4e5a314f2d734cd386ee092362d38307e18a4d49303a746ad42c0
7
+ data.tar.gz: 46b15a8cf96461ce3fa3e45aef1585e6806177be668a20410ca940687c5d1f5667d30b1e76701b37503984c0a5ca0aa23025728b0154ee306c868ba8bcdaf1db
data/.travis.yml ADDED
@@ -0,0 +1,10 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.3
4
+ - 2.0.0
5
+ - ruby-head
6
+ - jruby-19mode
7
+ - rbx-19mode
8
+ before_install:
9
+ - sudo apt-get install -qq poppler-utils
10
+
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
3
+
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License
2
+
3
+ Copyright (c) 2013 Daniel Mendler
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,88 @@
1
+ BibSync
2
+ =======
3
+
4
+ BibSync is a tool to synchronize your paper database with a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file which might be most
5
+ useful for Physicists and Mathematicians since it supports synchronization with [DOI](http://dx.doi.org/) and [arXiv](http://arxiv.org/).
6
+
7
+ I created this tool during the work on my diploma thesis in physics since I was unhappy
8
+ with existing tools like [Mendeley](http://www.mendeley.com/). I use this tool together with Git for version control
9
+ and [JabRef](http://jabref.sourceforge.net/) for browsing. This tool adheres more to the Unix philosophy that a small tool
10
+ for each task is better than one thing which tries to solve everything. If you use [JabRef](http://jabref.sourceforge.net/)
11
+ for browsing and tagging it is unnecessary to sort the papers into different sub directories by hand.
12
+ Just throw them all in one directory!
13
+
14
+ __Note__: This tool is derived from a script which I used during my thesis. It worked
15
+ quite well and reliable during that time. But be aware that I used Git for version control
16
+ of the [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file. So any mistakes which might be made by this tool could be reverted.
17
+
18
+ Features
19
+ --------
20
+
21
+ BibSync supports the following features:
22
+
23
+ * Synchronization between a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file and a directory containing the papers in pdf, ps or djvu format
24
+ * [JabRef](http://jabref.sourceforge.net/) file fields are generated, so you can open the existing papers directly out of [JabRef](http://jabref.sourceforge.net/)
25
+ * Downloading of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) metadata
26
+ * Extraction of [arXiv](http://arxiv.org/) or [DOI](http://dx.doi.org/) id out of the file using [pdftotext](http://en.wikipedia.org/wiki/Pdftotext)
27
+ * Downloading of new versions of [arXiv](http://arxiv.org/) papers
28
+ * Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
29
+ * Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
30
+ * Works under every platform supporting Ruby (Linux, Windows, ...)
31
+
32
+ Quick start
33
+ -----------
34
+
35
+ BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
36
+ the command line
37
+
38
+ ~~~
39
+ $ gem install bibsync
40
+ ~~~
41
+
42
+ After that you can use the 'bibsync' tool on the command line. At first let's validate
43
+ a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.
44
+
45
+ ~~~
46
+ $ bibsync -b ~/thesis/thesis.bib
47
+ ~~~
48
+
49
+ Then we want to synchronize all the papers in our paper directory with 'bibsync' and automatically download
50
+ the missing metadata.
51
+
52
+ ~~~
53
+ $ bibsync -d ~/thesis/papers -b ~/thesis/thesis.bib
54
+ ~~~
55
+
56
+ BibSync tries to download the metadata from [arxiv.org](http://arxiv.org) and [dx.doi.org](http://dx.doi.org). If you want to know more about the functions of 'bibsync' take a look at the command line help.
57
+
58
+ ~~~
59
+ $ bibsync --help
60
+ ~~~
61
+
62
+ My setup
63
+ --------
64
+
65
+ * BibSync for synchronizing
66
+ * [JabRef](http://jabref.sourceforge.net/) for browsing the bibliography, tagging and categorizing papers
67
+ * [Biblatex](http://www.ctan.org/pkg/biblatex) to include a bibliography in LaTeX with full Unicode support
68
+
69
+ Alternatives
70
+ ------------
71
+
72
+ * [Mendeley](http://www.mendeley.com/) (Commercial, synchronizes with their server, limited disk space, bloated gui application)
73
+ * [Zotero](http://www.zotero.org/) (Firefox plugin, Open source)
74
+
75
+ A better name?
76
+ --------------
77
+
78
+ If you have a suggestion for a better name, just let me know...
79
+
80
+ Author
81
+ ------
82
+
83
+ Daniel Mendler
84
+
85
+ License
86
+ -------
87
+
88
+ See LICENSE
data/Rakefile ADDED
@@ -0,0 +1,16 @@
1
+ begin
2
+ require 'bundler'
3
+ Bundler::GemHelper.install_tasks
4
+ rescue Exception
5
+ end
6
+
7
+ require 'rake/testtask'
8
+
9
+ Rake::TestTask.new :test do |t|
10
+ t.libs << 'lib' << 'test'
11
+ t.test_files = FileList['test/**/test_*.rb']
12
+ t.verbose = true
13
+ t.ruby_opts << '-w' << '-v'
14
+ end
15
+
16
+ task :default => :test
data/bibsync.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |s|
8
8
  s.date = Date.today.to_s
9
9
  s.authors = ['Daniel Mendler']
10
10
  s.email = ['mail@daniel-mendler.de']
11
- s.summary = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
12
- s.description = 'BibSync is a tool to synchronize scientific papers and bibtex bibliography files'
11
+ s.summary = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
12
+ s.description = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
13
13
  s.homepage = 'https://github.com/minad/bibsync'
14
14
  s.rubyforge_project = s.name
15
15
 
@@ -18,4 +18,6 @@ Gem::Specification.new do |s|
18
18
  s.require_paths = %w(lib)
19
19
 
20
20
  s.add_runtime_dependency('nokogiri')
21
+ s.add_development_dependency('rake')
22
+ s.add_development_dependency('minitest')
21
23
  end
@@ -1,14 +1,14 @@
1
1
  module BibSync
2
2
  module Actions
3
- class CheckVersions
3
+ class CheckArXivVersions
4
4
  include Log
5
5
  include Utils
6
6
 
7
7
  SliceSize = 20
8
8
 
9
9
  def initialize(options)
10
- raise 'Bibliography must be set' unless @bib = options[:bib]
11
- raise 'Directory must be set' unless @dir = options[:dir]
10
+ raise 'Option :bib is required' unless @bib = options[:bib]
11
+ raise 'Option :dir is required' unless @dir = options[:dir]
12
12
  @update = options[:update]
13
13
  end
14
14
 
@@ -16,16 +16,16 @@ module BibSync
16
16
  notice 'Check for newer version on arXiv'
17
17
  @bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
18
18
  begin
19
- xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, :version => false, :prefix => true) }.join(',')}&max_results=#{SliceSize}")
19
+ xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(',')}&max_results=#{SliceSize}")
20
20
  xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
21
21
  id.gsub!('http://arxiv.org/abs/', '')
22
22
  if id != entry[i][:arxiv]
23
- info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", :key => entry[i])
23
+ info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
24
24
  arxiv_download(@dir, id) if @update
25
25
  end
26
26
  end
27
27
  rescue => ex
28
- error('arXiv query failed', :ex => ex)
28
+ error('arXiv query failed', ex: ex)
29
29
  end
30
30
  end
31
31
 
@@ -0,0 +1,70 @@
1
+ module BibSync
2
+ module Actions
3
+ class DetermineArXivDOI
4
+ include Utils
5
+ include Log
6
+
7
+ def initialize(options)
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
+ @force = options[:resync]
10
+ end
11
+
12
+ def run
13
+ notice 'Determine arXiv and DOI identifiers'
14
+
15
+ @bib.each do |entry|
16
+ next if entry.comment? ||
17
+ (entry[:doi] && entry[:arxiv]) ||
18
+ (!@force && entry[:title] && entry[:author] && entry[:year])
19
+
20
+ determine_arxiv_and_doi(entry)
21
+
22
+ @bib.save
23
+ end
24
+ end
25
+
26
+ private
27
+
28
+ def determine_arxiv_and_doi(entry)
29
+ if file = entry.file
30
+ if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
31
+ debug('Searching for arXiv or doi identifier in pdf file', key: entry)
32
+ text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
33
+ entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
34
+ entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
35
+ end
36
+
37
+ if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
38
+ debug('Interpreting file name as arXiv identifier', key: entry)
39
+ entry[:arxiv] = $1
40
+ end
41
+
42
+ if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
43
+ debug('Interpreting file name as doi identifier', key: entry)
44
+ entry[:doi] = "10.1103/#{$1}"
45
+ end
46
+ end
47
+
48
+ if !entry[:arxiv] && entry[:doi]
49
+ begin
50
+ info('Fetch missing arXiv identifier', key: entry)
51
+ xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
52
+ if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
53
+ id = xml.xpath('//entry/id').map(&:content).first
54
+ if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
55
+ entry[:arxiv] = $1
56
+ end
57
+ end
58
+ rescue => ex
59
+ error('arXiv query by DOI failed', ex: ex, key: entry)
60
+ end
61
+ end
62
+
63
+ unless entry[:arxiv] || entry[:doi]
64
+ warning('No arXiv or DOI identifier found', key: entry)
65
+ end
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -7,17 +7,19 @@ module BibSync
7
7
  include Utils
8
8
 
9
9
  def initialize(options)
10
- raise 'Fetch must be set' unless @fetch = options[:fetch]
11
- raise 'Directory must be set' unless @dir = options[:dir]
10
+ raise 'Option :fetch is required' unless @fetch = options[:fetch]
11
+ raise 'Option :dir is required' unless @dir = options[:dir]
12
12
  end
13
13
 
14
14
  def run
15
- ids = []
15
+ arxivs = []
16
16
  urls = []
17
17
 
18
18
  @fetch.each do |url|
19
- if url =~ %r{^http://arxiv.org/abs/(\d+\.\d+)$}
20
- ids << $1
19
+ if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
20
+ arxivs << $1
21
+ elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
22
+ arxivs << $1
21
23
  else
22
24
  urls << url
23
25
  end
@@ -31,18 +33,18 @@ module BibSync
31
33
  end
32
34
  end
33
35
 
34
- unless ids.empty?
36
+ unless arxivs.empty?
35
37
  notice 'Downloading from arXiv'
36
- ids.each_slice(SliceSize) do |ids|
38
+ arxivs.each_slice(SliceSize) do |ids|
37
39
  begin
38
40
  xml = fetch_xml("http://export.arxiv.org/api/query?id_list=#{ids.join(',')}&max_results=#{SliceSize}")
39
41
  xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
40
42
  id.gsub!('http://arxiv.org/abs/', '')
41
- info 'arXiv download', :key => id
43
+ info 'arXiv download', key: id
42
44
  arxiv_download(@dir, id)
43
45
  end
44
46
  rescue => ex
45
- error('arXiv query failed', :ex => ex)
47
+ error('arXiv query failed', ex: ex)
46
48
  end
47
49
  end
48
50
  end
@@ -5,8 +5,8 @@ module BibSync
5
5
  include Utils
6
6
 
7
7
  def initialize(options)
8
- raise 'Bibliography must be set' unless @bib = options[:bib]
9
- raise 'Tex directory must be set' unless @dir = options[:citedbyme]
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
+ raise 'Option :citedbyme is required' unless @dir = options[:citedbyme]
10
10
  raise "#{@dir} is not a directory" unless File.directory?(@dir)
11
11
  end
12
12
 
@@ -19,7 +19,7 @@ module BibSync
19
19
  $1.split(/\s*,\s*/).each do |key|
20
20
  key.strip!
21
21
  file = @bib.relative_path(file)
22
- debug("Cited in #{file}", :key => key)
22
+ debug("Cited in #{file}", key: key)
23
23
  (cites[key] ||= []) << file
24
24
  end
25
25
  end
@@ -35,7 +35,7 @@ module BibSync
35
35
  if @bib[key]
36
36
  @bib[key][:citedbyme] = files
37
37
  else
38
- warning("Cited in #{files} but not found in #{@bib.file}", :key => key)
38
+ warning("Cited in #{files} but not found in #{@bib.file}", key: key)
39
39
  end
40
40
  end
41
41
 
@@ -1,11 +1,11 @@
1
1
  module BibSync
2
2
  module Actions
3
- class JabrefFormat
3
+ class JabRefFormat
4
4
  include Utils
5
5
  include Log
6
6
 
7
7
  def initialize(options)
8
- raise 'Bibliography must be set' unless @bib = options[:bib]
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
9
  end
10
10
 
11
11
  def run
@@ -7,8 +7,8 @@ module BibSync
7
7
  FileTypes = %w(djvu pdf ps)
8
8
 
9
9
  def initialize(options)
10
- raise 'Bibliography must be set' unless @bib = options[:bib]
11
- raise 'Directory must be set' unless @dir = options[:dir]
10
+ raise 'Option :bib is required' unless @bib = options[:bib]
11
+ raise 'Option :dir is required' unless @dir = options[:dir]
12
12
  end
13
13
 
14
14
  def run
@@ -17,16 +17,15 @@ module BibSync
17
17
  files = {}
18
18
  Dir[File.join(@dir, "**/*.{#{FileTypes.join(',')}}")].sort.each do |file|
19
19
  name = File.basename(file)
20
- key, type = split_filename(name)
20
+ key = name_without_ext(name)
21
21
  raise "Duplicate file #{name}" if files[key]
22
22
  files[key] = file
23
23
  end
24
24
 
25
25
  files.each do |key, file|
26
26
  unless entry = @bib[key]
27
- info('New file', :key => key)
28
- entry = Bibliography::Entry.new
29
- entry.key = key
27
+ info('New file', key: key)
28
+ entry = Bibliography::Entry.new(key: key)
30
29
  @bib << entry
31
30
  end
32
31
 
@@ -5,7 +5,7 @@ module BibSync
5
5
  include Log
6
6
 
7
7
  def initialize(options)
8
- raise 'Bibliography must be set' unless @bib = options[:bib]
8
+ raise 'Option :bib is required' unless @bib = options[:bib]
9
9
  @force = options[:resync]
10
10
  end
11
11
 
@@ -16,10 +16,8 @@ module BibSync
16
16
  next if entry.comment?
17
17
 
18
18
  if @force || !(entry[:title] && entry[:author] && entry[:year])
19
- determine_arxiv_and_doi(entry)
20
-
21
19
  if entry[:arxiv]
22
- if entry.key == arxiv_id(entry, :prefix => false, :version => true)
20
+ if entry.key == arxiv_id(entry, prefix: false, version: true)
23
21
  entry = rename_arxiv_file(entry)
24
22
  next unless entry
25
23
  end
@@ -40,28 +38,28 @@ module BibSync
40
38
  private
41
39
 
42
40
  def update_aps_abstract(entry)
43
- info("Downloading APS abstract", :key => entry)
41
+ info("Downloading APS abstract", key: entry)
44
42
  html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
45
43
  entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
46
44
  rescue => ex
47
- error('Abstract download failed', :key => entry, :ex => ex)
45
+ error('Abstract download failed', key: entry, ex: ex)
48
46
  end
49
47
 
50
48
  def update_doi(entry)
51
- info('Downloading doi.org metadata', :key => entry)
49
+ info('Downloading DOI metadata', key: entry)
52
50
  text = fetch("http://dx.doi.org/#{entry[:doi]}", 'Accept' => 'text/bibliography; style=bibtex')
53
51
  raise text if text == 'Unknown DOI'
54
52
  Bibliography::Entry.parse(text).each {|k, v| entry[k] = v }
55
53
  rescue => ex
56
54
  entry.delete(:doi)
57
- error('doi download failed', :key => entry, :ex => ex)
55
+ error('DOI download failed', key: entry, ex: ex)
58
56
  end
59
57
 
60
58
  # Rename arxiv file if key contains version
61
59
  def rename_arxiv_file(entry)
62
60
  file = entry.file
63
61
 
64
- key = arxiv_id(entry, :prefix => false, :version => false)
62
+ key = arxiv_id(entry, prefix: false, version: false)
65
63
 
66
64
  if old_entry = @bib[key]
67
65
  # Existing entry found
@@ -71,7 +69,7 @@ module BibSync
71
69
  entry[:arxiv] =~ /v(\d+)$/
72
70
  new_version = $1
73
71
  if old_version && new_version && old_version >= new_version
74
- info('Not updating existing entry with older version', :key => old_entry)
72
+ info('Not updating existing entry with older version', key: old_entry)
75
73
  File.delete(file[:path]) if file
76
74
  return nil
77
75
  end
@@ -79,14 +77,14 @@ module BibSync
79
77
  old_entry[:arxiv] = entry[:arxiv]
80
78
  old_entry[:doi] = entry[:doi]
81
79
  entry = old_entry
82
- info('Updating existing entry', :key => entry)
80
+ info('Updating existing entry', key: entry)
83
81
  else
84
82
  # This is a new entry
85
83
  entry.key = key
86
84
  end
87
85
 
88
86
  if file
89
- new_path = file[:path].sub(arxiv_id(entry, :prefix => false, :version => true), key)
87
+ new_path = file[:path].sub(arxiv_id(entry, prefix: false, version: true), key)
90
88
  File.rename(file[:path], new_path)
91
89
  entry.file = new_path
92
90
  end
@@ -97,8 +95,8 @@ module BibSync
97
95
  end
98
96
 
99
97
  def update_arxiv(entry)
100
- info('Downloading arXiv metadata', :key => entry)
101
- xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, :prefix => true, :version => false)}&metadataPrefix=arXiv")
98
+ info('Downloading arXiv metadata', key: entry)
99
+ xml = fetch_xml("http://export.arxiv.org/oai2?verb=GetRecord&identifier=oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}&metadataPrefix=arXiv")
102
100
  error = xml.xpath('//error').map(&:content).first
103
101
  raise error if error
104
102
 
@@ -108,7 +106,7 @@ module BibSync
108
106
  entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
109
107
  "{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
110
108
  end.join(' and ')
111
- entry[:journal] = ArXivJournal
109
+ entry[:journal] = 'ArXiv e-prints'
112
110
  entry[:eprint] = entry[:arxiv]
113
111
  entry[:archiveprefix] = 'arXiv'
114
112
  date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
@@ -124,49 +122,8 @@ module BibSync
124
122
  entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
125
123
  rescue => ex
126
124
  entry.delete(:arxiv)
127
- error('arXiv download failed', :key => entry, :ex => ex)
125
+ error('arXiv download failed', key: entry, ex: ex)
128
126
  end
129
-
130
- def determine_arxiv_and_doi(entry)
131
- if file = entry.file
132
- if file[:type] == :PDF && !entry[:arxiv] && !entry[:doi]
133
- debug('Searching for arXiv or doi identifier in pdf file', :key => entry)
134
- text = `pdftotext -f 1 -l 2 #{Shellwords.escape file[:path]} - 2>/dev/null`
135
- entry[:arxiv] = $1 if text =~ /arXiv:\s*([\w\.\/\-]+)/
136
- entry[:doi] = $1 if text =~ /doi:\s*([\w\.\/\-]+)/i
137
- end
138
-
139
- if !entry[:arxiv] && file[:name] =~ /^(\d+.\d+v\d+)\.\w+$/
140
- debug('Interpreting file name as arXiv identifier', :key => entry)
141
- entry[:arxiv] = $1
142
- end
143
-
144
- if !entry[:doi] && file[:name] =~ /^(PhysRev.*?|RevModPhys.*?)\.\w+$/
145
- debug('Interpreting file name as doi identifier', :key => entry)
146
- entry[:doi] = "10.1103/#{$1}"
147
- end
148
- end
149
-
150
- if !entry[:arxiv] && entry[:doi]
151
- begin
152
- info('Fetch missing arXiv identifier', :key => entry)
153
- xml = fetch_xml("http://export.arxiv.org/api/query?search_query=doi:#{entry[:doi]}&max_results=1")
154
- if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
155
- id = xml.xpath('//entry/id').map(&:content).first
156
- if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
157
- entry[:arxiv] = $1
158
- end
159
- end
160
- rescue => ex
161
- error('arXiv doi query failed', :ex => ex, :key => entry)
162
- end
163
- end
164
-
165
- unless entry[:arxiv] || entry[:doi]
166
- warning('No arXiv or doi identifier found', :key => entry)
167
- end
168
- end
169
-
170
127
  end
171
128
  end
172
129
  end
@@ -10,7 +10,7 @@ module BibSync
10
10
 
11
11
  def run
12
12
  notice 'Check validity'
13
- titles, arxivs = {}, {}
13
+ titles, arxivs, dois = {}, {}, {}
14
14
 
15
15
  @bib.each do |entry|
16
16
  next if entry.comment?
@@ -18,14 +18,16 @@ module BibSync
18
18
  w = []
19
19
 
20
20
  file = entry.file
21
- w << 'Missing file' unless file && File.file?(file[:path])
22
21
 
23
- w += [:title, :author, :year, :abstract].reject {|k| entry[k] }.map {|k| "Missing #{k}" }
22
+ missing = []
23
+ missing << :file unless file && File.file?(file[:path])
24
+ missing += [:title, :author, :year, :abstract].reject {|k| entry[k] }
25
+ w << "Missing #{missing.map(&:to_s).sort.join(', ')}" unless missing.empty?
24
26
 
25
- w << 'Invalid file' if split_filename(file[:name]).first != entry.key if file
27
+ w << 'File name does not match entry key' if name_without_ext(file[:name]) != entry.key if file
26
28
 
27
29
  if entry[:arxiv]
28
- id = arxiv_id(entry, :version => false, :prefix => true)
30
+ id = arxiv_id(entry, version: false, prefix: true)
29
31
  if arxivs.include?(id)
30
32
  w << "ArXiv duplicate of '#{arxivs[id]}'"
31
33
  else
@@ -33,6 +35,14 @@ module BibSync
33
35
  end
34
36
  end
35
37
 
38
+ if id = entry[:doi]
39
+ if dois.include?(id)
40
+ w << "DOI duplicate of '#{dois[id]}'"
41
+ else
42
+ dois[id] = entry.key
43
+ end
44
+ end
45
+
36
46
  if entry[:title]
37
47
  if titles.include?(entry[:title])
38
48
  w << "Title duplicate of '#{titles[entry[:title]]}'"
@@ -41,7 +51,7 @@ module BibSync
41
51
  end
42
52
  end
43
53
 
44
- warning(w.join(', '), :key => entry) unless w.empty?
54
+ warning(w.join('; '), key: entry) unless w.empty?
45
55
  end
46
56
  end
47
57
  end
@@ -1,7 +1 @@
1
- require 'bibsync/actions/check_versions'
2
- require 'bibsync/actions/synchronize_files'
3
- require 'bibsync/actions/synchronize_metadata'
4
- require 'bibsync/actions/validate'
5
- require 'bibsync/actions/jabref_format'
6
- require 'bibsync/actions/fetch_from_arxiv'
7
- require 'bibsync/actions/find_my_citations'
1
+ Dir[File.join(File.dirname(__FILE__), 'actions', '*.rb')].each {|f| require f }