RubyGems - bibsync - Versions diffs - 0.0.5 → 0.0.8 - Mend

bibsync 0.0.5 → 0.0.8

Files changed (18) hide show

checksums.yaml +4 -4
data/.gitignore +2 -1
data/.travis.yml +3 -1
data/README.md +17 -1
data/bibsync.gemspec +2 -2
data/lib/bibsync.rb +1 -1
data/lib/bibsync/actions/check_arxiv_versions.rb +2 -2
data/lib/bibsync/actions/determine_arxiv_doi.rb +3 -4
data/lib/bibsync/actions/fetch_from_arxiv.rb +3 -3
data/lib/bibsync/actions/find_my_citations.rb +0 -2
data/lib/bibsync/actions/synchronize_files.rb +0 -2
data/lib/bibsync/actions/synchronize_metadata.rb +42 -25
data/lib/bibsync/bibliography.rb +7 -2
data/lib/bibsync/command.rb +4 -3
data/lib/bibsync/utils.rb +1 -7
data/lib/bibsync/version.rb +1 -1
data/test/test_utils.rb +1 -7
metadata +5 -18

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 656b20418877066a48f7e30f3f702b47e71c7a5d
-  data.tar.gz: af9f5d4f088255dccc86bd12655a009830050ac3
+  metadata.gz: 5578a98ae327ca6b8ee4b5fb0da4b2bbd1786116
+  data.tar.gz: eb56b9ab2ce3d9f1b6599da6a1b4ca0c76db220e
 SHA512:
-  metadata.gz: 55e6153fa4ffa968cbf1f08dba256c2a0c3b427f6a84babc16d99e62bb519e2450335191dd6ce0c4bc8eac1a830d1ba27999215a517b35feb24f4eda2ed92cd5
-  data.tar.gz: 4f9a2308012649be07f5bf3c8549c1a9ae9a1c0410e8c1f33de47e7991dcd1f067e83ea3e96a5c67e518a7b67980ad358bfa24f641a9f6f12740bff76da35449
+  metadata.gz: 3050a0ec740223617d6f700bca7725443e3ab2b1a1cce7ef46c6091ed777d18b3bdc3f6f9ad17abcce3b40f923959069960d977128c112cfbe84e933a3bcfbcd
+  data.tar.gz: d4692dcd5ce86c2cfaf6698a7c77ca5450487067077ba87804325b54cf77a2e7654ae025914819476be79f649e56d21e1c5e056402e46dc02a5d8a3113ed0a57

data/.gitignore CHANGED

@@ -1,6 +1,7 @@
 *.swp
 *.gem
+.#*
 Gemfile.lock
 .bundle
 .yardoc
+test/tmp

data/.travis.yml CHANGED

@@ -8,4 +8,6 @@ rvm:
 before_install:
   - sudo apt-get update -qq
   - sudo apt-get install -qq poppler-utils jabref
+matrix:
+  allow_failures:
+    - rvm: ruby-head

data/README.md CHANGED

@@ -27,11 +27,21 @@ BibSync supports the following features:
 * Downloading of new versions of [arXiv](http://arxiv.org/) papers
 * Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
 * Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
-* Works under every platform supporting Ruby (Linux, Windows, ...)
+* Works under every platform supporting Ruby and `pdftotext` (Linux, Windows, ...)
 Quick start
 -----------
+At first you have to ensure that you have the `pdftotext` program available on your `$PATH`. Under Debian you can install
+the package using `apt-get` as follows
+~~~
+$ apt-get install poppler-utils
+$ pdftotext
+pdftotext version 0.24.1
+...
+~~~
 BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
 the command line
@@ -39,6 +49,12 @@ the command line
 $ gem install bibsync
 ~~~
+And for updating, you write
+~~~
+$ gem update bibsync
+~~~
 After that you can use the 'bibsync' tool on the command line. At first let's validate
 a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.

data/bibsync.gemspec CHANGED

@@ -9,15 +9,15 @@ Gem::Specification.new do |s|
   s.authors           = ['Daniel Mendler']
   s.email             = ['mail@daniel-mendler.de']
   s.summary           = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
-  s.description       = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
+  s.description       = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files. It automatically downloads the metadata from dx.doi.org and arxiv.org.'
   s.homepage          = 'https://github.com/minad/bibsync'
   s.rubyforge_project = s.name
+  s.license           = 'MIT'
   s.files         = `git ls-files`.split("\n")
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = %w(lib)
-  s.add_runtime_dependency('nokogiri')
   s.add_runtime_dependency('faraday')
   s.add_runtime_dependency('faraday_middleware')
   s.add_development_dependency('rake')

data/lib/bibsync.rb CHANGED

@@ -1,10 +1,10 @@
-require 'nokogiri'
 require 'faraday'
 require 'faraday_middleware'
 require 'shellwords'
 require 'date'
 require 'pathname'
 require 'forwardable'
+require 'rexml/document'
 require 'bibsync/version'
 require 'bibsync/utils'
 require 'bibsync/log'

data/lib/bibsync/actions/check_arxiv_versions.rb CHANGED

@@ -17,8 +17,8 @@ module BibSync
         @bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
           begin
             xml = fetch_xml('http://export.arxiv.org/api/query', id_list: entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(','), max_results: SliceSize)
-            xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
-              id.gsub!('http://arxiv.org/abs/', '')
+            xml.get_elements('//entry').each_with_index do |e, i|
+              id = e.elements['id'].text.gsub('http://arxiv.org/abs/', '')
               if id != entry[i][:arxiv]
                 info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
                 arxiv_download(@dir, id) if @update

data/lib/bibsync/actions/determine_arxiv_doi.rb CHANGED

@@ -18,8 +18,6 @@ module BibSync
                   (!@force && entry[:title] && entry[:author] && entry[:year])
           determine_arxiv_and_doi(entry)
-          @bib.save
         end
       end
@@ -49,8 +47,9 @@ module BibSync
           begin
             info('Fetch missing arXiv identifier', key: entry)
             xml = fetch_xml('http://export.arxiv.org/api/query', search_query: "doi:#{entry[:doi]}", max_results: 1)
-            if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
-              id = xml.xpath('//entry/id').map(&:content).first
+            doi = xml.elements['//arxiv:doi']
+            if doi && doi.text == entry[:doi]
+              id = xml.elements['//entry/id'].text
               if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
                 entry[:arxiv] = $1
               end

data/lib/bibsync/actions/fetch_from_arxiv.rb CHANGED

@@ -18,7 +18,7 @@ module BibSync
         @fetch.each do |url|
           if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
             arxivs << $1
-          elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
+          elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)(v\d+)?\Z}
             arxivs << $1
           else
             urls << url
@@ -38,8 +38,8 @@ module BibSync
           arxivs.each_slice(SliceSize) do |ids|
             begin
               xml = fetch_xml('http://export.arxiv.org/api/query', id_list: ids.join(','), max_results: SliceSize)
-              xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
-                id.gsub!('http://arxiv.org/abs/', '')
+              xml.each_element('//entry/id') do |id|
+                id = id.text.gsub('http://arxiv.org/abs/', '')
                 info 'arXiv download', key: id
                 arxiv_download(@dir, id)
               end

data/lib/bibsync/actions/find_my_citations.rb CHANGED

@@ -38,8 +38,6 @@ module BibSync
             warning("Cited in #{files} but not found in #{@bib.file}", key: key)
           end
         end
-        @bib.save
       end
     end
   end

data/lib/bibsync/actions/synchronize_files.rb CHANGED

@@ -32,8 +32,6 @@ module BibSync
           entry.type ||= :ARTICLE
           entry.file = file
         end
-        @bib.save
       end
     end
   end

data/lib/bibsync/actions/synchronize_metadata.rb CHANGED

@@ -15,6 +15,8 @@ module BibSync
         @bib.to_a.each do |entry|
           next if entry.comment?
+          entry.delete(:abstract) if @force
           if @force || !(entry[:title] && entry[:author] && entry[:year])
             if entry[:arxiv]
               if entry.key == arxiv_id(entry, prefix: false, version: true)
@@ -27,11 +29,12 @@ module BibSync
             update_doi(entry) if entry[:doi]
           end
-          if entry[:doi] =~ /\A10\.1103\// && (@force || !entry[:abstract])
+          if entry[:doi] =~ /\A10\.1103\// && !entry[:abstract]
             update_aps_abstract(entry)
           end
-          @bib.save
+          # Add timestamp when this entry was added
+          entry[:added] ||= Date.today.to_s
         end
       end
@@ -39,20 +42,34 @@ module BibSync
       def update_aps_abstract(entry)
         info("Downloading APS abstract", key: entry)
-        html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
-        entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
+        html = fetch("http://link.aps.org/doi/#{entry[:doi]}")
+        if html =~ %r{<div class='aps-abstractbox'>(.*?)</div>}
+          entry[:abstract] = $1.gsub(/<[^>]+>/, '')
+        end
       rescue => ex
         error('Abstract download failed', key: entry, ex: ex)
       end
       def update_doi(entry)
-        info('Downloading DOI metadata', key: entry)
-        text = fetch("http://dx.doi.org/#{entry[:doi]}", nil, 'Accept' => 'text/bibliography; style=bibtex')
+        url = "http://dx.doi.org/#{entry[:doi]}"
+        info("Downloading DOI metadata from #{url}", key: entry)
+        text = fetch(url, nil, 'Accept' => 'text/bibliography; style=bibtex')
         raise text if text == 'Unknown DOI'
         Entry.parse(text).each {|k, v| entry[k] = v }
       rescue => ex
-        entry.delete(:doi)
         error('DOI download failed', key: entry, ex: ex)
+        # dx.doi.org shows spurious 500 errors
+        if ex.respond_to?(:response) && ex.response[:status] == 500
+          tries ||= 0
+          tries += 1
+          if tries < 10
+            info('Retrying...', key: entry)
+            retry
+          else
+            error('Giving up :(', key: entry)
+          end
+        end
+        entry.delete(:doi)
       end
       # Rename arxiv file if key contains version
@@ -89,36 +106,36 @@ module BibSync
           entry.file = new_path
         end
-        @bib.save
         entry
       end
       def update_arxiv(entry)
         info('Downloading arXiv metadata', key: entry)
         xml = fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: "oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}", metadataPrefix: 'arXiv')
-        error = xml.xpath('//error').map(&:content).first
-        raise error if error
-        entry[:title] = xml.xpath('//arXiv/title').map(&:content).first
-        entry[:abstract] = xml.xpath('//arXiv/abstract').map(&:content).first
-        entry[:primaryclass] = xml.xpath('//arXiv/categories').map(&:content).first.split(/\s+/).first
-        entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
-          "{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
+        error = xml.elements['//error']
+        raise error.text if error
+        arXiv = xml.elements['//arXiv']
+        entry[:title] = arXiv.elements['title'].text
+        entry[:abstract] = arXiv.elements['abstract'].text
+        entry[:arxivcategories] = arXiv.elements['categories'].text
+        entry[:primaryclass] = entry[:arxivcategories].split(/\s+/).first
+        entry[:author] = arXiv.get_elements('authors/author').map do |author|
+          "{#{author.elements['keyname'].text}}, {#{author.elements['forenames'].text}}"
         end.join(' and ')
         entry[:journal] = 'ArXiv e-prints'
         entry[:eprint] = entry[:arxiv]
         entry[:archiveprefix] = 'arXiv'
-        date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
-        date = Date.parse(date)
+        entry[:arxivcreated] = arXiv.elements['created'].text if arXiv.elements['created']
+        entry[:arxivupdated] = arXiv.elements['updated'].text if arXiv.elements['updated']
+        date = Date.parse(entry[:arxivupdated] || entry[:arxivcreated])
         entry[:year] = date.year
         entry[:month] = Literal.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
-        doi = xml.xpath('//arXiv/doi').map(&:content).first
-        entry[:doi] = doi if doi
-        journal = xml.xpath('//arXiv/journal-ref').map(&:content).first
-        entry[:journal] = journal if journal
-        comments = xml.xpath('//arXiv/comments').map(&:content).first
-        entry[:comments] = comments if comments
+        entry[:doi] = arXiv.elements['doi'].text if arXiv.elements['doi']
+        entry[:journal] = arXiv.elements['journal-ref'].text if arXiv.elements['journal-ref']
+        entry[:comments] = arXiv.elements['comments'].text if arXiv.elements['comments']
         entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
       rescue => ex
         entry.delete(:arxiv)

data/lib/bibsync/bibliography.rb CHANGED

@@ -55,8 +55,13 @@ module BibSync
       raise 'No filename given' unless @file
       if @dirty
         @save_hook.call(self) if @save_hook
-        File.open("#{@file}.tmp", 'w') {|f| f.write(self) }
-        File.rename("#{@file}.tmp", @file)
+        tmpfile = "#{@file}.tmp"
+        begin
+          File.open(tmpfile, 'w') {|f| f.write(self) }
+          File.rename(tmpfile, @file)
+        ensure
+          File.unlink(tmpfile) rescue nil
+        end
         @dirty = false
         true
       else

data/lib/bibsync/command.rb CHANGED

@@ -83,14 +83,15 @@ module BibSync
     def process
       if @args.size != 0
-        error 'Too many arguments'
+        puts 'Too many arguments'
         puts @opts
         exit
       end
       if @options[:bib]
-        @options[:bib] = Bibliography.new(@options[:bib])
-        @options[:bib].save_hook = Transformer.new
+        bib = @options[:bib] = Bibliography.new(@options[:bib])
+        bib.save_hook = Transformer.new
+        at_exit { bib.save }
       end
       actions = []

data/lib/bibsync/utils.rb CHANGED

@@ -27,13 +27,7 @@ module BibSync
     end
     def fetch_xml(url, params = nil, headers = nil)
-      xml = Nokogiri::XML(fetch(url, params, headers))
-      xml.remove_namespaces!
-      xml
-    end
-    def fetch_html(url, params = nil, headers = nil)
-      Nokogiri::HTML(fetch(url, params, headers))
+      REXML::Document.new(fetch(url, params, headers)).root
     end
     def arxiv_id(arxiv, opts = {})

data/lib/bibsync/version.rb CHANGED

@@ -1,3 +1,3 @@
 module BibSync
-  VERSION = '0.0.5'
+  VERSION = '0.0.8'
 end

data/test/test_utils.rb CHANGED

@@ -24,13 +24,7 @@ describe BibSync::Utils do
   describe '#fetch_xml' do
     it 'fetches xml' do
-      fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of Nokogiri::XML::Document
-    end
-  end
-  describe '#fetch_html' do
-    it 'fetches html' do
-      fetch_html('http://google.com').must_be_instance_of Nokogiri::HTML::Document
+      fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of REXML::Element
     end
   end

metadata CHANGED

@@ -1,29 +1,15 @@
 --- !ruby/object:Gem::Specification
 name: bibsync
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.8
 platform: ruby
 authors:
 - Daniel Mendler
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-07-04 00:00:00.000000000 Z
+date: 2013-10-08 00:00:00.000000000 Z
 dependencies:
-- !ruby/object:Gem::Dependency
-  name: nokogiri
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - '>='
-      - !ruby/object:Gem::Version
-        version: '0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - '>='
-      - !ruby/object:Gem::Version
-        version: '0'
 - !ruby/object:Gem::Dependency
   name: faraday
   requirement: !ruby/object:Gem::Requirement
@@ -81,7 +67,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 description: BibSync is a tool to synchronize scientific papers and BibTeX bibliography
-  files
+  files. It automatically downloads the metadata from dx.doi.org and arxiv.org.
 email:
 - mail@daniel-mendler.de
 executables:
@@ -135,7 +121,8 @@ files:
 - test/test_entry.rb
 - test/test_utils.rb
 homepage: https://github.com/minad/bibsync
-licenses: []
+licenses:
+- MIT
 metadata: {}
 post_install_message:
 rdoc_options: []