RubyGems - bookshark - Versions diffs - 1.0.0.alpha.3 → 1.0.0.alpha.5 - Mend

bookshark 1.0.0.alpha.3 → 1.0.0.alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

checksums.yaml +4 -4
data/lib/bookshark/extractors/author_extractor.rb +7 -2
data/lib/bookshark/extractors/book_extractor.rb +14 -5
data/lib/bookshark/extractors/category_extractor.rb +9 -6
data/lib/bookshark/extractors/publisher_extractor.rb +10 -3
data/lib/bookshark/version.rb +1 -1
data/spec/bookshark_spec.rb +116 -19
data/spec/test_data/empty_author.json +5 -0
data/spec/test_data/empty_book.json +5 -0
data/spec/test_data/empty_category.json +5 -0
data/spec/test_data/empty_publisher.json +5 -0
metadata +9 -1

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: d1a505294880b3816fc6382b9d48035d0e780e85
-  data.tar.gz: 5cc5432c462fea9ab4781bf91bfa9cef9ffa2e2d
+  metadata.gz: 818b101a82314fcff676b111f3e407821a9f0dc5
+  data.tar.gz: 12dd9368013a7911b4ce8c3f799db90e42c80671
 SHA512:
-  metadata.gz: aeee746c931f9171c58c1594145cdaa91d89fda5de4671a4e9a5cf92b3e3d09bffaea863b555326b2224594b901b3859ffff8eacc0a078acd600fc9fee005a7c
-  data.tar.gz: f6f5061c6a636e4eb558508000cb572fd2cd34a59802146a45f6f9bea89f6fa30e14e13a1de54e49a067bae8217b6b9685bc6d124a0d1ac31c3fbb800ea738ab
+  metadata.gz: b937fcae31844c3742ff6ad00ef91e27a24fe2d8f67c2319fbafeab5db16e1fdc382e26fb37a9d1409ba446fefe8c4407b9cc3d7d95038da6533e09aac2a8900
+  data.tar.gz: 70932725282459f1b6c161517630c5a8edd364efca16d4f2e41b1170ebba721368b9fef14eab7d9fa710c2ebd2e099374d76f53ee15c40b9df0dfa99799eab0e

data/lib/bookshark/extractors/author_extractor.rb CHANGED

@@ -43,7 +43,12 @@ module Biblionet
         # puts JSON.pretty_generate(author_hash)
-        return @author = author_hash
+        if author_hash[:lastname].nil? and author_hash[:firstname].nil?
+          return nil
+        else
+          return @author = author_hash
+        end
       end
       def split_name(fullname)
@@ -81,7 +86,7 @@ module Biblionet
         if (content_re.match(document)).nil?
           puts document
         end
-        content = content_re.match(document)[0]
+        content = content_re.match(document)[0] unless (content_re.match(document)).nil?
         @nodeset = Nokogiri::HTML(content)
       end

data/lib/bookshark/extractors/book_extractor.rb CHANGED

@@ -9,12 +9,12 @@ module Biblionet
       def initialize(uri=nil)
         super(uri)
-        extract_book unless uri.nil?
+        extract_book unless uri.nil? or @page.nil?
       end
       def load_and_extract_book(uri=nil)
         load_page(uri)
-        extract_book unless uri.nil?
+        extract_book unless uri.nil? or @page.nil?
       end
       # Converts the parsed contributors string to hash.
@@ -116,6 +116,10 @@ module Biblionet
         page = BookDataExtractor.new(book_page)
+        # End extraction if BookDataExtractor couldnt create a nodeset
+        return nil if page.nodeset.nil?
         book_hash = Hash.new
         begin
@@ -200,9 +204,14 @@ module Biblionet
         if (content_re.match(document)).nil?
           puts document
         end
-        content = content_re.match(document)[0]
-        @nodeset = Nokogiri::HTML(content)
+        content = content_re.match(document)[0] unless (content_re.match(document)).nil?
+        # If content is nil, there is something wrong with the html, so return nil
+        if content.nil?
+          @nodeset = nil
+        else
+          @nodeset = Nokogiri::HTML(content)
+        end
       end
       def image

data/lib/bookshark/extractors/category_extractor.rb CHANGED

@@ -8,7 +8,7 @@ module Biblionet
       def initialize(uri=nil)
         super(uri)
-        extract_categories unless uri.nil?
+        extract_categories unless uri.nil? or @page.nil?
       end
       def extract_categories(category_page=@page)
@@ -43,15 +43,18 @@ module Biblionet
           category_hash = {biblionet_id => category.clone}
         end.reduce({}, :update) unless @page.nil?
-        @categories[:current] = (@categories[@biblionet_id.to_s].clone)
-        @categories[:current][:b_id] = @biblionet_id
-        return @categories
+        if present?(@categories)
+          @categories[:current] = (@categories[@biblionet_id.to_s].clone)
+          @categories[:current][:b_id] = @biblionet_id
+          return @categories
+        else
+          return nil
+        end
       end
       def extract_categories_from(uri=nil)
         load_page(uri)
-        extract_categories unless uri.nil?
+        extract_categories unless uri.nil? or @page.nil?
       end

data/lib/bookshark/extractors/publisher_extractor.rb CHANGED

@@ -21,6 +21,8 @@ module Biblionet
         puts "Extracting publisher: #{biblionet_id}"
         page = PublisherDataExtractor.new(publisher_page)
+        return nil if page.nodeset.nil?
         headquarters                    = page.headquarters
         bookstores                      = page.bookstores
         bookstores['Έδρα']              = headquarters
@@ -46,9 +48,14 @@ module Biblionet
         if (content_re.match(document)).nil?
           puts document
         end
-        content = content_re.match(document)[0]
-        @nodeset = Nokogiri::HTML(content)
+        content = content_re.match(document)[0] unless (content_re.match(document)).nil?
+        # If content is nil, there is something wrong with the html, so return nil
+        if content.nil?
+          @nodeset = nil
+        else
+          @nodeset = Nokogiri::HTML(content)
+        end
       end
       def name

data/lib/bookshark/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Bookshark
-  VERSION = "1.0.0.alpha.3"
+  VERSION = "1.0.0.alpha.5"
 end

data/spec/bookshark_spec.rb CHANGED

@@ -9,13 +9,37 @@ describe Bookshark::Extractor do
   let(:eager_book_184923) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/eager_book_184923.json", File.dirname(__FILE__))).read)) }
   let(:search_01)         { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/search_01.json"        , File.dirname(__FILE__))).read)) }
   let(:search_ids_01)     { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/search_ids_01.json"    , File.dirname(__FILE__))).read)) }
+  let(:empty_book)        { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_book.json"       , File.dirname(__FILE__))).read)) }
+  let(:empty_author)      { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_author.json"     , File.dirname(__FILE__))).read)) }
+  let(:empty_publisher)   { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_publisher.json"  , File.dirname(__FILE__))).read)) }
+  let(:empty_category)    { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_category.json"   , File.dirname(__FILE__))).read)) }
   describe '#author' do
     context 'from remote html source' do
-      it 'reads html from the web and extracts author data' do
-        expect(subject.author(id: 13219)).to eq author_13219
+      context 'when the author exists' do
+        it 'reads html from the web based on given id and extracts author data' do
+          expect(subject.author(id: 13219)).to eq author_13219
+        end
+        it 'reads html from the web based on given uri and extracts author data' do
+          expect(subject.author(uri: "http://biblionet.gr/author/13219")).to eq author_13219
+        end
       end
+      context 'when the author doesnt exist' do
+        it 'returns an empty array' do
+          expect(subject.author(id: 0)).to eq empty_author
+        end
+      end
+      context 'when no options are set' do
+        it 'returns an empty array' do
+          expect(subject.author).to eq empty_author
+        end
+      end
+      context 'when a the given uri is wrong' do
+        it 'returns an empty array' do
+          expect(subject.author(uri: "http://google.com")).to eq empty_author
+        end
+      end
     end
     context 'from local storage' do
@@ -28,41 +52,114 @@ describe Bookshark::Extractor do
   describe '#publisher' do
     context 'extract from remote html source' do
-      it 'reads html from the web and extracts publisher data' do
-        expect(subject.publisher(id: 20)).to eq publisher_20
+      context 'when the publisher exists' do
+        it 'reads html from the web based on given id and extracts publisher data' do
+          expect(subject.publisher(id: 20)).to eq publisher_20
+        end
+        it 'reads html from the web based on given uri and extracts publisher data' do
+          expect(subject.publisher(uri: "http://biblionet.gr/com/20")).to eq publisher_20
+        end
       end
+      context 'when the publisher doesnt exist' do
+        it 'returns an empty array' do
+          expect(subject.publisher(id: 0)).to eq empty_publisher
+        end
+      end
+      context 'when no options are set' do
+        it 'returns an empty array' do
+          expect(subject.publisher).to eq empty_publisher
+        end
+      end
+      context 'when a the given uri is wrong' do
+        it 'returns an empty array' do
+          expect(subject.publisher(uri: "http://google.com")).to eq empty_publisher
+        end
+      end
     end
   end
   describe '#category' do
     context 'extract from remote html source' do
-      it 'reads html from the web and extracts category data' do
-        expect(subject.category(id: 1041)).to eq category_1041
+      context 'when the category exists' do
+        it 'reads html from the web based on given id and extracts category data' do
+          expect(subject.category(id: 1041)).to eq category_1041
+        end
+        it 'reads html from the web based on given uri and extracts category data' do
+          expect(subject.category(uri: "http://biblionet.gr/index/1041")).to eq category_1041
+        end
       end
+      context 'when the category doesnt exist' do
+        it 'returns an empty array' do
+          expect(subject.category(id: 0)).to eq empty_category
+        end
+      end
+      context 'when no options are set' do
+        it 'returns an empty array' do
+          expect(subject.category).to eq empty_category
+        end
+      end
+      context 'when a the given uri is wrong' do
+        it 'returns an empty array' do
+          expect(subject.category(uri: "http://google.com")).to eq empty_category
+        end
+      end
     end
   end
   describe '#book' do
     context 'extract from remote html source' do
-      it 'reads html from the web and extracts book data' do
-        expect(subject.book(id: 103788)).to eq book_103788
-      end
-      it 'reads html from the web and eager extracts all book and reference data' do
-        expect(subject.book(id: 184923, eager: true)).to eq eager_book_184923
+      context 'when book exists' do
+        it 'reads html from the web based on given id and extracts book data' do
+          expect(subject.book(id: 103788)).to eq book_103788
+        end
+        it 'reads html from the web based on given uri and extracts book data' do
+          expect(subject.book(uri: "http://biblionet.gr/book/103788")).to eq book_103788
+        end
+        it 'reads html from the web and eager extracts all book and reference data' do
+          expect(subject.book(id: 184923, eager: true)).to eq eager_book_184923
+        end
       end
+      context 'when the book doesnt exist' do
+        it 'returns an empty array' do
+          expect(subject.book(id: 0)).to eq empty_book
+        end
+      end
+      context 'when no options are set' do
+        it 'returns an empty array' do
+          expect(subject.book).to eq empty_book
+        end
+      end
+      context 'when a the given uri is wrong' do
+        it 'returns an empty array' do
+          expect(subject.book(uri: "http://google.com")).to eq empty_book
+        end
+      end
     end
   end
   describe '#search' do
-    context 'extract from remote html source' do
-      it 'builds a search url and extracts book ids from search page' do
-        expect(subject.search(title: 'σημεια και τερατα', results_type: 'ids')).to eq search_ids_01
+    context 'search and extract from remote html source' do
+      context 'when books are found' do
+        it 'builds a search url and extracts book ids from search page' do
+          expect(subject.search(title: 'σημεια και τερατα', results_type: 'ids')).to eq search_ids_01
+        end
+        it 'builds a search url and extracts book data from search page' do
+          expect(subject.search(title: 'σημεια και τερατα', results_type: 'metadata')).to eq search_01
+        end
       end
-      it 'builds a search url and extracts book data from search page' do
-        expect(subject.search(title: 'σημεια και τερατα', results_type: 'metadata')).to eq search_01
-      end
+      context 'when no books are found' do
+        it 'returns an empty array' do
+          expect(subject.search(isbn: 'some-invalid-isbn')).to eq empty_book
+        end
+      end
+      context 'when no options are set' do
+        it 'returns an empty array' do
+          expect(subject.search).to eq empty_book
+        end
+      end
     end
   end

data/spec/test_data/empty_author.json ADDED

@@ -0,0 +1,5 @@
+{
+  "author": [
+  ]
+}

data/spec/test_data/empty_book.json ADDED

@@ -0,0 +1,5 @@
+{
+  "book": [
+  ]
+}

data/spec/test_data/empty_category.json ADDED

@@ -0,0 +1,5 @@
+{
+  "category": [
+  ]
+}

data/spec/test_data/empty_publisher.json ADDED

@@ -0,0 +1,5 @@
+{
+  "publisher": [
+  ]
+}

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: bookshark
 version: !ruby/object:Gem::Version
-  version: 1.0.0.alpha.3
+  version: 1.0.0.alpha.5
 platform: ruby
 authors:
 - Dimitris Klisiaris
@@ -149,6 +149,10 @@ files:
 - spec/test_data/book_103788.json
 - spec/test_data/category_1041.json
 - spec/test_data/eager_book_184923.json
+- spec/test_data/empty_author.json
+- spec/test_data/empty_book.json
+- spec/test_data/empty_category.json
+- spec/test_data/empty_publisher.json
 - spec/test_data/publisher_20.json
 - spec/test_data/search_01.json
 - spec/test_data/search_ids_01.json
@@ -186,6 +190,10 @@ test_files:
 - spec/test_data/book_103788.json
 - spec/test_data/category_1041.json
 - spec/test_data/eager_book_184923.json
+- spec/test_data/empty_author.json
+- spec/test_data/empty_book.json
+- spec/test_data/empty_category.json
+- spec/test_data/empty_publisher.json
 - spec/test_data/publisher_20.json
 - spec/test_data/search_01.json
 - spec/test_data/search_ids_01.json