RubyGems - bibsync - Versions diffs - 0.0.5 → 0.0.8 - Mend

bibsync 0.0.5 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
data/.gitignore +2 -1
data/.travis.yml +3 -1
data/README.md +17 -1
data/bibsync.gemspec +2 -2
data/lib/bibsync.rb +1 -1
data/lib/bibsync/actions/check_arxiv_versions.rb +2 -2
data/lib/bibsync/actions/determine_arxiv_doi.rb +3 -4
data/lib/bibsync/actions/fetch_from_arxiv.rb +3 -3
data/lib/bibsync/actions/find_my_citations.rb +0 -2
data/lib/bibsync/actions/synchronize_files.rb +0 -2
data/lib/bibsync/actions/synchronize_metadata.rb +42 -25
data/lib/bibsync/bibliography.rb +7 -2
data/lib/bibsync/command.rb +4 -3
data/lib/bibsync/utils.rb +1 -7
data/lib/bibsync/version.rb +1 -1
data/test/test_utils.rb +1 -7
metadata +5 -18

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 656b20418877066a48f7e30f3f702b47e71c7a5d
-  data.tar.gz: af9f5d4f088255dccc86bd12655a009830050ac3
+  metadata.gz: 5578a98ae327ca6b8ee4b5fb0da4b2bbd1786116
+  data.tar.gz: eb56b9ab2ce3d9f1b6599da6a1b4ca0c76db220e
 SHA512:
-  metadata.gz: 55e6153fa4ffa968cbf1f08dba256c2a0c3b427f6a84babc16d99e62bb519e2450335191dd6ce0c4bc8eac1a830d1ba27999215a517b35feb24f4eda2ed92cd5
-  data.tar.gz: 4f9a2308012649be07f5bf3c8549c1a9ae9a1c0410e8c1f33de47e7991dcd1f067e83ea3e96a5c67e518a7b67980ad358bfa24f641a9f6f12740bff76da35449
+  metadata.gz: 3050a0ec740223617d6f700bca7725443e3ab2b1a1cce7ef46c6091ed777d18b3bdc3f6f9ad17abcce3b40f923959069960d977128c112cfbe84e933a3bcfbcd
+  data.tar.gz: d4692dcd5ce86c2cfaf6698a7c77ca5450487067077ba87804325b54cf77a2e7654ae025914819476be79f649e56d21e1c5e056402e46dc02a5d8a3113ed0a57

data/.gitignore CHANGED

@@ -1,6 +1,7 @@
 *.swp
 *.gem
+.#*
 Gemfile.lock
 .bundle
 .yardoc
+test/tmp

data/.travis.yml CHANGED

@@ -8,4 +8,6 @@ rvm:
 before_install:
   - sudo apt-get update -qq
   - sudo apt-get install -qq poppler-utils jabref
+matrix:
+  allow_failures:
+    - rvm: ruby-head

data/README.md CHANGED

@@ -27,11 +27,21 @@ BibSync supports the following features:
 * Downloading of new versions of [arXiv](http://arxiv.org/) papers
 * Simple validation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) files (Checks for missing fields etc)
 * Simple transformation of [BibTeX](http://en.wikipedia.org/wiki/BibTeX) fields (Normalization of author, year and journal field...)
-* Works under every platform supporting Ruby (Linux, Windows, ...)
+* Works under every platform supporting Ruby and `pdftotext` (Linux, Windows, ...)
 Quick start
 -----------
+At first you have to ensure that you have the `pdftotext` program available on your `$PATH`. Under Debian you can install
+the package using `apt-get` as follows
+~~~
+$ apt-get install poppler-utils
+$ pdftotext
+pdftotext version 0.24.1
+...
+~~~
 BibSync requires Ruby >= 1.9.2 to run. It is distributed as a RubyGems package. You can install it via
 the command line
@@ -39,6 +49,12 @@ the command line
 $ gem install bibsync
 ~~~
+And for updating, you write
+~~~
+$ gem update bibsync
+~~~
 After that you can use the 'bibsync' tool on the command line. At first let's validate
 a [BibTeX](http://en.wikipedia.org/wiki/BibTeX) file called 'thesis.bib'.

data/bibsync.gemspec CHANGED

@@ -9,15 +9,15 @@ Gem::Specification.new do |s|
   s.authors           = ['Daniel Mendler']
   s.email             = ['mail@daniel-mendler.de']
   s.summary           = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
-  s.description       = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files'
+  s.description       = 'BibSync is a tool to synchronize scientific papers and BibTeX bibliography files. It automatically downloads the metadata from dx.doi.org and arxiv.org.'
   s.homepage          = 'https://github.com/minad/bibsync'
   s.rubyforge_project = s.name
+  s.license           = 'MIT'
   s.files         = `git ls-files`.split("\n")
   s.executables   = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
   s.require_paths = %w(lib)
-  s.add_runtime_dependency('nokogiri')
   s.add_runtime_dependency('faraday')
   s.add_runtime_dependency('faraday_middleware')
   s.add_development_dependency('rake')

data/lib/bibsync.rb CHANGED

@@ -1,10 +1,10 @@
-require 'nokogiri'
 require 'faraday'
 require 'faraday_middleware'
 require 'shellwords'
 require 'date'
 require 'pathname'
 require 'forwardable'
+require 'rexml/document'
 require 'bibsync/version'
 require 'bibsync/utils'
 require 'bibsync/log'

data/lib/bibsync/actions/check_arxiv_versions.rb CHANGED

@@ -17,8 +17,8 @@ module BibSync
         @bib.select {|e| e[:arxiv] }.each_slice(SliceSize) do |entry|
           begin
             xml = fetch_xml('http://export.arxiv.org/api/query', id_list: entry.map{|e| arxiv_id(e, version: false, prefix: true) }.join(','), max_results: SliceSize)
-            xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
-              id.gsub!('http://arxiv.org/abs/', '')
+            xml.get_elements('//entry').each_with_index do |e, i|
+              id = e.elements['id'].text.gsub('http://arxiv.org/abs/', '')
               if id != entry[i][:arxiv]
                 info("#{entry[i][:arxiv]} replaced by http://arxiv.org/pdf/#{id}", key: entry[i])
                 arxiv_download(@dir, id) if @update

data/lib/bibsync/actions/determine_arxiv_doi.rb CHANGED

@@ -18,8 +18,6 @@ module BibSync
                   (!@force && entry[:title] && entry[:author] && entry[:year])
           determine_arxiv_and_doi(entry)
-          @bib.save
         end
       end
@@ -49,8 +47,9 @@ module BibSync
           begin
             info('Fetch missing arXiv identifier', key: entry)
             xml = fetch_xml('http://export.arxiv.org/api/query', search_query: "doi:#{entry[:doi]}", max_results: 1)
-            if xml.xpath('//entry/doi').map(&:content).first == entry[:doi]
-              id = xml.xpath('//entry/id').map(&:content).first
+            doi = xml.elements['//arxiv:doi']
+            if doi && doi.text == entry[:doi]
+              id = xml.elements['//entry/id'].text
               if id =~ %r{\Ahttp://arxiv.org/abs/(.+)\Z}
                 entry[:arxiv] = $1
               end

data/lib/bibsync/actions/fetch_from_arxiv.rb CHANGED

@@ -18,7 +18,7 @@ module BibSync
         @fetch.each do |url|
           if url =~ /\A(\d+\.\d+)(v\d+)?\Z/
             arxivs << $1
-          elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)\Z}
+          elsif url =~ %r{\Ahttp://arxiv.org/abs/(\d+\.\d+)(v\d+)?\Z}
             arxivs << $1
           else
             urls << url
@@ -38,8 +38,8 @@ module BibSync
           arxivs.each_slice(SliceSize) do |ids|
             begin
               xml = fetch_xml('http://export.arxiv.org/api/query', id_list: ids.join(','), max_results: SliceSize)
-              xml.xpath('//entry/id').map(&:content).each_with_index do |id, i|
-                id.gsub!('http://arxiv.org/abs/', '')
+              xml.each_element('//entry/id') do |id|
+                id = id.text.gsub('http://arxiv.org/abs/', '')
                 info 'arXiv download', key: id
                 arxiv_download(@dir, id)
               end

data/lib/bibsync/actions/find_my_citations.rb CHANGED

@@ -38,8 +38,6 @@ module BibSync
             warning("Cited in #{files} but not found in #{@bib.file}", key: key)
           end
         end
-        @bib.save
       end
     end
   end

data/lib/bibsync/actions/synchronize_files.rb CHANGED

@@ -32,8 +32,6 @@ module BibSync
           entry.type ||= :ARTICLE
           entry.file = file
         end
-        @bib.save
       end
     end
   end

data/lib/bibsync/actions/synchronize_metadata.rb CHANGED

@@ -15,6 +15,8 @@ module BibSync
         @bib.to_a.each do |entry|
           next if entry.comment?
+          entry.delete(:abstract) if @force
           if @force || !(entry[:title] && entry[:author] && entry[:year])
             if entry[:arxiv]
               if entry.key == arxiv_id(entry, prefix: false, version: true)
@@ -27,11 +29,12 @@ module BibSync
             update_doi(entry) if entry[:doi]
           end
-          if entry[:doi] =~ /\A10\.1103\// && (@force || !entry[:abstract])
+          if entry[:doi] =~ /\A10\.1103\// && !entry[:abstract]
             update_aps_abstract(entry)
           end
-          @bib.save
+          # Add timestamp when this entry was added
+          entry[:added] ||= Date.today.to_s
         end
       end
@@ -39,20 +42,34 @@ module BibSync
       def update_aps_abstract(entry)
         info("Downloading APS abstract", key: entry)
-        html = fetch_html("http://link.aps.org/doi/#{entry[:doi]}")
-        entry[:abstract] = html.css('.aps-abstractbox').map(&:content).first
+        html = fetch("http://link.aps.org/doi/#{entry[:doi]}")
+        if html =~ %r{<div class='aps-abstractbox'>(.*?)</div>}
+          entry[:abstract] = $1.gsub(/<[^>]+>/, '')
+        end
       rescue => ex
         error('Abstract download failed', key: entry, ex: ex)
       end
       def update_doi(entry)
-        info('Downloading DOI metadata', key: entry)
-        text = fetch("http://dx.doi.org/#{entry[:doi]}", nil, 'Accept' => 'text/bibliography; style=bibtex')
+        url = "http://dx.doi.org/#{entry[:doi]}"
+        info("Downloading DOI metadata from #{url}", key: entry)
+        text = fetch(url, nil, 'Accept' => 'text/bibliography; style=bibtex')
         raise text if text == 'Unknown DOI'
         Entry.parse(text).each {|k, v| entry[k] = v }
       rescue => ex
-        entry.delete(:doi)
         error('DOI download failed', key: entry, ex: ex)
+        # dx.doi.org shows spurious 500 errors
+        if ex.respond_to?(:response) && ex.response[:status] == 500
+          tries ||= 0
+          tries += 1
+          if tries < 10
+            info('Retrying...', key: entry)
+            retry
+          else
+            error('Giving up :(', key: entry)
+          end
+        end
+        entry.delete(:doi)
       end
       # Rename arxiv file if key contains version
@@ -89,36 +106,36 @@ module BibSync
           entry.file = new_path
         end
-        @bib.save
         entry
       end
       def update_arxiv(entry)
         info('Downloading arXiv metadata', key: entry)
         xml = fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: "oai:arXiv.org:#{arxiv_id(entry, prefix: true, version: false)}", metadataPrefix: 'arXiv')
-        error = xml.xpath('//error').map(&:content).first
-        raise error if error
-        entry[:title] = xml.xpath('//arXiv/title').map(&:content).first
-        entry[:abstract] = xml.xpath('//arXiv/abstract').map(&:content).first
-        entry[:primaryclass] = xml.xpath('//arXiv/categories').map(&:content).first.split(/\s+/).first
-        entry[:author] = xml.xpath('//arXiv/authors/author').map do |author|
-          "{#{author.xpath('keyname').map(&:content).first}}, {#{author.xpath('forenames').map(&:content).first}}"
+        error = xml.elements['//error']
+        raise error.text if error
+        arXiv = xml.elements['//arXiv']
+        entry[:title] = arXiv.elements['title'].text
+        entry[:abstract] = arXiv.elements['abstract'].text
+        entry[:arxivcategories] = arXiv.elements['categories'].text
+        entry[:primaryclass] = entry[:arxivcategories].split(/\s+/).first
+        entry[:author] = arXiv.get_elements('authors/author').map do |author|
+          "{#{author.elements['keyname'].text}}, {#{author.elements['forenames'].text}}"
         end.join(' and ')
         entry[:journal] = 'ArXiv e-prints'
         entry[:eprint] = entry[:arxiv]
         entry[:archiveprefix] = 'arXiv'
-        date = xml.xpath('//arXiv/updated').map(&:content).first || xml.xpath('//arXiv/created').map(&:content).first
-        date = Date.parse(date)
+        entry[:arxivcreated] = arXiv.elements['created'].text if arXiv.elements['created']
+        entry[:arxivupdated] = arXiv.elements['updated'].text if arXiv.elements['updated']
+        date = Date.parse(entry[:arxivupdated] || entry[:arxivcreated])
         entry[:year] = date.year
         entry[:month] = Literal.new(%w(jan feb mar apr may jun jul aug sep oct nov dec)[date.month - 1])
-        doi = xml.xpath('//arXiv/doi').map(&:content).first
-        entry[:doi] = doi if doi
-        journal = xml.xpath('//arXiv/journal-ref').map(&:content).first
-        entry[:journal] = journal if journal
-        comments = xml.xpath('//arXiv/comments').map(&:content).first
-        entry[:comments] = comments if comments
+        entry[:doi] = arXiv.elements['doi'].text if arXiv.elements['doi']
+        entry[:journal] = arXiv.elements['journal-ref'].text if arXiv.elements['journal-ref']
+        entry[:comments] = arXiv.elements['comments'].text if arXiv.elements['comments']
         entry[:url] = "http://arxiv.org/abs/#{entry[:arxiv]}"
       rescue => ex
         entry.delete(:arxiv)

data/lib/bibsync/bibliography.rb CHANGED

@@ -55,8 +55,13 @@ module BibSync
       raise 'No filename given' unless @file
       if @dirty
         @save_hook.call(self) if @save_hook
-        File.open("#{@file}.tmp", 'w') {|f| f.write(self) }
-        File.rename("#{@file}.tmp", @file)
+        tmpfile = "#{@file}.tmp"
+        begin
+          File.open(tmpfile, 'w') {|f| f.write(self) }
+          File.rename(tmpfile, @file)
+        ensure
+          File.unlink(tmpfile) rescue nil
+        end
         @dirty = false
         true
       else

data/lib/bibsync/command.rb CHANGED

@@ -83,14 +83,15 @@ module BibSync
     def process
       if @args.size != 0
-        error 'Too many arguments'
+        puts 'Too many arguments'
         puts @opts
         exit
       end
       if @options[:bib]
-        @options[:bib] = Bibliography.new(@options[:bib])
-        @options[:bib].save_hook = Transformer.new
+        bib = @options[:bib] = Bibliography.new(@options[:bib])
+        bib.save_hook = Transformer.new
+        at_exit { bib.save }
       end
       actions = []

data/lib/bibsync/utils.rb CHANGED

@@ -27,13 +27,7 @@ module BibSync
     end
     def fetch_xml(url, params = nil, headers = nil)
-      xml = Nokogiri::XML(fetch(url, params, headers))
-      xml.remove_namespaces!
-      xml
-    end
-    def fetch_html(url, params = nil, headers = nil)
-      Nokogiri::HTML(fetch(url, params, headers))
+      REXML::Document.new(fetch(url, params, headers)).root
     end
     def arxiv_id(arxiv, opts = {})

data/lib/bibsync/version.rb CHANGED

@@ -1,3 +1,3 @@
 module BibSync
-  VERSION = '0.0.5'
+  VERSION = '0.0.8'
 end

data/test/test_utils.rb CHANGED

@@ -24,13 +24,7 @@ describe BibSync::Utils do
   describe '#fetch_xml' do
     it 'fetches xml' do
-      fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of Nokogiri::XML::Document
-    end
-  end
-  describe '#fetch_html' do
-    it 'fetches html' do
-      fetch_html('http://google.com').must_be_instance_of Nokogiri::HTML::Document
+      fetch_xml('http://export.arxiv.org/oai2', verb: 'GetRecord', identifier: 'oai:arXiv.org:1208.2881', metadataPrefix: 'arXiv').must_be_instance_of REXML::Element
     end
   end

metadata CHANGED

@@ -1,29 +1,15 @@
 --- !ruby/object:Gem::Specification
 name: bibsync
 version: !ruby/object:Gem::Version
-  version: 0.0.5
+  version: 0.0.8
 platform: ruby
 authors:
 - Daniel Mendler
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2013-07-04 00:00:00.000000000 Z
+date: 2013-10-08 00:00:00.000000000 Z
 dependencies:
-- !ruby/object:Gem::Dependency
-  name: nokogiri
-  requirement: !ruby/object:Gem::Requirement
-    requirements:
-    - - '>='
-      - !ruby/object:Gem::Version
-        version: '0'
-  type: :runtime
-  prerelease: false
-  version_requirements: !ruby/object:Gem::Requirement
-    requirements:
-    - - '>='
-      - !ruby/object:Gem::Version
-        version: '0'
 - !ruby/object:Gem::Dependency
   name: faraday
   requirement: !ruby/object:Gem::Requirement
@@ -81,7 +67,7 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 description: BibSync is a tool to synchronize scientific papers and BibTeX bibliography
-  files
+  files. It automatically downloads the metadata from dx.doi.org and arxiv.org.
 email:
 - mail@daniel-mendler.de
 executables:
@@ -135,7 +121,8 @@ files:
 - test/test_entry.rb
 - test/test_utils.rb
 homepage: https://github.com/minad/bibsync
-licenses: []
+licenses:
+- MIT
 metadata: {}
 post_install_message:
 rdoc_options: []