bookshark 1.0.0.alpha.3 → 1.0.0.alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d1a505294880b3816fc6382b9d48035d0e780e85
4
- data.tar.gz: 5cc5432c462fea9ab4781bf91bfa9cef9ffa2e2d
3
+ metadata.gz: 818b101a82314fcff676b111f3e407821a9f0dc5
4
+ data.tar.gz: 12dd9368013a7911b4ce8c3f799db90e42c80671
5
5
  SHA512:
6
- metadata.gz: aeee746c931f9171c58c1594145cdaa91d89fda5de4671a4e9a5cf92b3e3d09bffaea863b555326b2224594b901b3859ffff8eacc0a078acd600fc9fee005a7c
7
- data.tar.gz: f6f5061c6a636e4eb558508000cb572fd2cd34a59802146a45f6f9bea89f6fa30e14e13a1de54e49a067bae8217b6b9685bc6d124a0d1ac31c3fbb800ea738ab
6
+ metadata.gz: b937fcae31844c3742ff6ad00ef91e27a24fe2d8f67c2319fbafeab5db16e1fdc382e26fb37a9d1409ba446fefe8c4407b9cc3d7d95038da6533e09aac2a8900
7
+ data.tar.gz: 70932725282459f1b6c161517630c5a8edd364efca16d4f2e41b1170ebba721368b9fef14eab7d9fa710c2ebd2e099374d76f53ee15c40b9df0dfa99799eab0e
@@ -43,7 +43,12 @@ module Biblionet
43
43
 
44
44
  # puts JSON.pretty_generate(author_hash)
45
45
 
46
- return @author = author_hash
46
+ if author_hash[:lastname].nil? and author_hash[:firstname].nil?
47
+ return nil
48
+ else
49
+ return @author = author_hash
50
+ end
51
+
47
52
  end
48
53
 
49
54
  def split_name(fullname)
@@ -81,7 +86,7 @@ module Biblionet
81
86
  if (content_re.match(document)).nil?
82
87
  puts document
83
88
  end
84
- content = content_re.match(document)[0]
89
+ content = content_re.match(document)[0] unless (content_re.match(document)).nil?
85
90
 
86
91
  @nodeset = Nokogiri::HTML(content)
87
92
  end
@@ -9,12 +9,12 @@ module Biblionet
9
9
 
10
10
  def initialize(uri=nil)
11
11
  super(uri)
12
- extract_book unless uri.nil?
12
+ extract_book unless uri.nil? or @page.nil?
13
13
  end
14
14
 
15
15
  def load_and_extract_book(uri=nil)
16
16
  load_page(uri)
17
- extract_book unless uri.nil?
17
+ extract_book unless uri.nil? or @page.nil?
18
18
  end
19
19
 
20
20
  # Converts the parsed contributors string to hash.
@@ -116,6 +116,10 @@ module Biblionet
116
116
 
117
117
  page = BookDataExtractor.new(book_page)
118
118
 
119
+ # End extraction if BookDataExtractor couldnt create a nodeset
120
+ return nil if page.nodeset.nil?
121
+
122
+
119
123
  book_hash = Hash.new
120
124
 
121
125
  begin
@@ -200,9 +204,14 @@ module Biblionet
200
204
  if (content_re.match(document)).nil?
201
205
  puts document
202
206
  end
203
- content = content_re.match(document)[0]
204
-
205
- @nodeset = Nokogiri::HTML(content)
207
+ content = content_re.match(document)[0] unless (content_re.match(document)).nil?
208
+
209
+ # If content is nil, there is something wrong with the html, so return nil
210
+ if content.nil?
211
+ @nodeset = nil
212
+ else
213
+ @nodeset = Nokogiri::HTML(content)
214
+ end
206
215
  end
207
216
 
208
217
  def image
@@ -8,7 +8,7 @@ module Biblionet
8
8
 
9
9
  def initialize(uri=nil)
10
10
  super(uri)
11
- extract_categories unless uri.nil?
11
+ extract_categories unless uri.nil? or @page.nil?
12
12
  end
13
13
 
14
14
  def extract_categories(category_page=@page)
@@ -43,15 +43,18 @@ module Biblionet
43
43
  category_hash = {biblionet_id => category.clone}
44
44
  end.reduce({}, :update) unless @page.nil?
45
45
 
46
- @categories[:current] = (@categories[@biblionet_id.to_s].clone)
47
- @categories[:current][:b_id] = @biblionet_id
48
-
49
- return @categories
46
+ if present?(@categories)
47
+ @categories[:current] = (@categories[@biblionet_id.to_s].clone)
48
+ @categories[:current][:b_id] = @biblionet_id
49
+ return @categories
50
+ else
51
+ return nil
52
+ end
50
53
  end
51
54
 
52
55
  def extract_categories_from(uri=nil)
53
56
  load_page(uri)
54
- extract_categories unless uri.nil?
57
+ extract_categories unless uri.nil? or @page.nil?
55
58
  end
56
59
 
57
60
 
@@ -21,6 +21,8 @@ module Biblionet
21
21
  puts "Extracting publisher: #{biblionet_id}"
22
22
  page = PublisherDataExtractor.new(publisher_page)
23
23
 
24
+ return nil if page.nodeset.nil?
25
+
24
26
  headquarters = page.headquarters
25
27
  bookstores = page.bookstores
26
28
  bookstores['Έδρα'] = headquarters
@@ -46,9 +48,14 @@ module Biblionet
46
48
  if (content_re.match(document)).nil?
47
49
  puts document
48
50
  end
49
- content = content_re.match(document)[0]
50
-
51
- @nodeset = Nokogiri::HTML(content)
51
+ content = content_re.match(document)[0] unless (content_re.match(document)).nil?
52
+
53
+ # If content is nil, there is something wrong with the html, so return nil
54
+ if content.nil?
55
+ @nodeset = nil
56
+ else
57
+ @nodeset = Nokogiri::HTML(content)
58
+ end
52
59
  end
53
60
 
54
61
  def name
@@ -1,3 +1,3 @@
1
1
  module Bookshark
2
- VERSION = "1.0.0.alpha.3"
2
+ VERSION = "1.0.0.alpha.5"
3
3
  end
@@ -9,13 +9,37 @@ describe Bookshark::Extractor do
9
9
  let(:eager_book_184923) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/eager_book_184923.json", File.dirname(__FILE__))).read)) }
10
10
  let(:search_01) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/search_01.json" , File.dirname(__FILE__))).read)) }
11
11
  let(:search_ids_01) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/search_ids_01.json" , File.dirname(__FILE__))).read)) }
12
+ let(:empty_book) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_book.json" , File.dirname(__FILE__))).read)) }
13
+ let(:empty_author) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_author.json" , File.dirname(__FILE__))).read)) }
14
+ let(:empty_publisher) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_publisher.json" , File.dirname(__FILE__))).read)) }
15
+ let(:empty_category) { JSON.pretty_generate(JSON.parse(open(File.expand_path("test_data/empty_category.json" , File.dirname(__FILE__))).read)) }
12
16
 
13
17
 
14
18
  describe '#author' do
15
19
  context 'from remote html source' do
16
- it 'reads html from the web and extracts author data' do
17
- expect(subject.author(id: 13219)).to eq author_13219
20
+ context 'when the author exists' do
21
+ it 'reads html from the web based on given id and extracts author data' do
22
+ expect(subject.author(id: 13219)).to eq author_13219
23
+ end
24
+ it 'reads html from the web based on given uri and extracts author data' do
25
+ expect(subject.author(uri: "http://biblionet.gr/author/13219")).to eq author_13219
26
+ end
18
27
  end
28
+ context 'when the author doesnt exist' do
29
+ it 'returns an empty array' do
30
+ expect(subject.author(id: 0)).to eq empty_author
31
+ end
32
+ end
33
+ context 'when no options are set' do
34
+ it 'returns an empty array' do
35
+ expect(subject.author).to eq empty_author
36
+ end
37
+ end
38
+ context 'when a the given uri is wrong' do
39
+ it 'returns an empty array' do
40
+ expect(subject.author(uri: "http://google.com")).to eq empty_author
41
+ end
42
+ end
19
43
  end
20
44
 
21
45
  context 'from local storage' do
@@ -28,41 +52,114 @@ describe Bookshark::Extractor do
28
52
 
29
53
  describe '#publisher' do
30
54
  context 'extract from remote html source' do
31
- it 'reads html from the web and extracts publisher data' do
32
- expect(subject.publisher(id: 20)).to eq publisher_20
55
+ context 'when the publisher exists' do
56
+ it 'reads html from the web based on given id and extracts publisher data' do
57
+ expect(subject.publisher(id: 20)).to eq publisher_20
58
+ end
59
+ it 'reads html from the web based on given uri and extracts publisher data' do
60
+ expect(subject.publisher(uri: "http://biblionet.gr/com/20")).to eq publisher_20
61
+ end
33
62
  end
63
+ context 'when the publisher doesnt exist' do
64
+ it 'returns an empty array' do
65
+ expect(subject.publisher(id: 0)).to eq empty_publisher
66
+ end
67
+ end
68
+ context 'when no options are set' do
69
+ it 'returns an empty array' do
70
+ expect(subject.publisher).to eq empty_publisher
71
+ end
72
+ end
73
+ context 'when a the given uri is wrong' do
74
+ it 'returns an empty array' do
75
+ expect(subject.publisher(uri: "http://google.com")).to eq empty_publisher
76
+ end
77
+ end
34
78
  end
35
79
  end
36
80
 
37
81
  describe '#category' do
38
82
  context 'extract from remote html source' do
39
- it 'reads html from the web and extracts category data' do
40
- expect(subject.category(id: 1041)).to eq category_1041
83
+ context 'when the category exists' do
84
+ it 'reads html from the web based on given id and extracts category data' do
85
+ expect(subject.category(id: 1041)).to eq category_1041
86
+ end
87
+ it 'reads html from the web based on given uri and extracts category data' do
88
+ expect(subject.category(uri: "http://biblionet.gr/index/1041")).to eq category_1041
89
+ end
41
90
  end
91
+ context 'when the category doesnt exist' do
92
+ it 'returns an empty array' do
93
+ expect(subject.category(id: 0)).to eq empty_category
94
+ end
95
+ end
96
+ context 'when no options are set' do
97
+ it 'returns an empty array' do
98
+ expect(subject.category).to eq empty_category
99
+ end
100
+ end
101
+ context 'when a the given uri is wrong' do
102
+ it 'returns an empty array' do
103
+ expect(subject.category(uri: "http://google.com")).to eq empty_category
104
+ end
105
+ end
42
106
  end
43
107
  end
44
108
 
45
109
  describe '#book' do
46
110
  context 'extract from remote html source' do
47
- it 'reads html from the web and extracts book data' do
48
- expect(subject.book(id: 103788)).to eq book_103788
49
- end
50
-
51
- it 'reads html from the web and eager extracts all book and reference data' do
52
- expect(subject.book(id: 184923, eager: true)).to eq eager_book_184923
111
+ context 'when book exists' do
112
+ it 'reads html from the web based on given id and extracts book data' do
113
+ expect(subject.book(id: 103788)).to eq book_103788
114
+ end
115
+
116
+ it 'reads html from the web based on given uri and extracts book data' do
117
+ expect(subject.book(uri: "http://biblionet.gr/book/103788")).to eq book_103788
118
+ end
119
+
120
+ it 'reads html from the web and eager extracts all book and reference data' do
121
+ expect(subject.book(id: 184923, eager: true)).to eq eager_book_184923
122
+ end
53
123
  end
124
+ context 'when the book doesnt exist' do
125
+ it 'returns an empty array' do
126
+ expect(subject.book(id: 0)).to eq empty_book
127
+ end
128
+ end
129
+ context 'when no options are set' do
130
+ it 'returns an empty array' do
131
+ expect(subject.book).to eq empty_book
132
+ end
133
+ end
134
+ context 'when a the given uri is wrong' do
135
+ it 'returns an empty array' do
136
+ expect(subject.book(uri: "http://google.com")).to eq empty_book
137
+ end
138
+ end
54
139
  end
55
140
  end
56
141
 
57
142
  describe '#search' do
58
- context 'extract from remote html source' do
59
- it 'builds a search url and extracts book ids from search page' do
60
- expect(subject.search(title: 'σημεια και τερατα', results_type: 'ids')).to eq search_ids_01
143
+ context 'search and extract from remote html source' do
144
+ context 'when books are found' do
145
+ it 'builds a search url and extracts book ids from search page' do
146
+ expect(subject.search(title: 'σημεια και τερατα', results_type: 'ids')).to eq search_ids_01
147
+ end
148
+
149
+ it 'builds a search url and extracts book data from search page' do
150
+ expect(subject.search(title: 'σημεια και τερατα', results_type: 'metadata')).to eq search_01
151
+ end
61
152
  end
62
-
63
- it 'builds a search url and extracts book data from search page' do
64
- expect(subject.search(title: 'σημεια και τερατα', results_type: 'metadata')).to eq search_01
65
- end
153
+ context 'when no books are found' do
154
+ it 'returns an empty array' do
155
+ expect(subject.search(isbn: 'some-invalid-isbn')).to eq empty_book
156
+ end
157
+ end
158
+ context 'when no options are set' do
159
+ it 'returns an empty array' do
160
+ expect(subject.search).to eq empty_book
161
+ end
162
+ end
66
163
  end
67
164
  end
68
165
 
@@ -0,0 +1,5 @@
1
+ {
2
+ "author": [
3
+
4
+ ]
5
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "book": [
3
+
4
+ ]
5
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "category": [
3
+
4
+ ]
5
+ }
@@ -0,0 +1,5 @@
1
+ {
2
+ "publisher": [
3
+
4
+ ]
5
+ }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bookshark
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0.alpha.3
4
+ version: 1.0.0.alpha.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitris Klisiaris
@@ -149,6 +149,10 @@ files:
149
149
  - spec/test_data/book_103788.json
150
150
  - spec/test_data/category_1041.json
151
151
  - spec/test_data/eager_book_184923.json
152
+ - spec/test_data/empty_author.json
153
+ - spec/test_data/empty_book.json
154
+ - spec/test_data/empty_category.json
155
+ - spec/test_data/empty_publisher.json
152
156
  - spec/test_data/publisher_20.json
153
157
  - spec/test_data/search_01.json
154
158
  - spec/test_data/search_ids_01.json
@@ -186,6 +190,10 @@ test_files:
186
190
  - spec/test_data/book_103788.json
187
191
  - spec/test_data/category_1041.json
188
192
  - spec/test_data/eager_book_184923.json
193
+ - spec/test_data/empty_author.json
194
+ - spec/test_data/empty_book.json
195
+ - spec/test_data/empty_category.json
196
+ - spec/test_data/empty_publisher.json
189
197
  - spec/test_data/publisher_20.json
190
198
  - spec/test_data/search_01.json
191
199
  - spec/test_data/search_ids_01.json