bookshark 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d35fee946c6b6dcf4ca740d89ba3a9cb89f36a94
4
- data.tar.gz: ff928cdadd16b132adc9f193ff5c7f565a0b0398
3
+ metadata.gz: '03852f73c9246676ff20b75b0a893998e967f2c7'
4
+ data.tar.gz: 7fe938710c2e9344563395e5db7cf3e7eaf76b25
5
5
  SHA512:
6
- metadata.gz: 2dac9ad4842172d896a488fa60baaf13d862c8d2e3b1e68b3a9f5e6ee28bec5136d4ed7d084c4bf6fa0f5f1cfd3224241958467c7df99929ea8cfc1c5ad92abc
7
- data.tar.gz: e8f9dcb4f20e0a2330a91588c6dfbb306a74820ee9ed7fa7564097013c16244de46ffc3a636a751126f19ce9c1a32b209fb254d7533d901d6f77a00a6b8b5100
6
+ metadata.gz: 3ccd4cfd0e82aa6918304e82df18f382f2b7e33470c9f52ff362b4897aaf53b703a04bd2689871925293d7e084d3e0008c74aa289fb36a106be7dca20c01ee0f
7
+ data.tar.gz: fb4590ee8c1a24402f48b3502ddb77e24fbbde2c988a18fb62fe867762950f351d3271512773602bd9cec47d7e51a6cb94de1e2d695cf7c0d8704a39bc72f905
@@ -24,9 +24,11 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency "sanitize", "~> 4.0"
25
25
  spec.add_dependency "json", "~> 1.8"
26
26
  spec.add_dependency "htmlentities", "~> 4.3"
27
+ spec.add_dependency "marc", "~> 1.0"
27
28
 
28
29
  spec.add_development_dependency "bundler", ">= 1.6"
29
30
  spec.add_development_dependency "rake", "~> 10.0"
30
31
  spec.add_development_dependency 'rspec', "~> 3.2"
31
32
  spec.add_development_dependency "webmock", "~> 1.2"
33
+ spec.add_development_dependency "pry-byebug", "~> 3.4"
32
34
  end
@@ -7,6 +7,7 @@ require 'bookshark/extractors/book_extractor'
7
7
  require 'bookshark/extractors/bibliographical_book_extractor'
8
8
  require 'bookshark/extractors/publisher_extractor'
9
9
  require 'bookshark/extractors/search'
10
+ require 'bookshark/extractors/nlg/book_extractor'
10
11
 
11
12
  require 'bookshark/crawlers/base'
12
13
  require 'bookshark/crawlers/publisher_crawler'
@@ -22,8 +23,8 @@ module Bookshark
22
23
  def self.root
23
24
  # File.dirname __dir__ # Works only on ruby > 2.0.0
24
25
  File.expand_path(File.join(File.dirname(__FILE__), '../'))
25
- end
26
-
26
+ end
27
+
27
28
  def self.path_to_storage
28
29
  File.join root, 'lib/bookshark/storage'
29
30
  end
@@ -31,7 +32,7 @@ module Bookshark
31
32
 
32
33
  class Extractor
33
34
  include FileManager
34
- attr_accessor :site, :format
35
+ attr_accessor :site, :format
35
36
 
36
37
  def initialize(options = {})
37
38
  options = DEFAULTS.merge(options)
@@ -44,9 +45,9 @@ module Bookshark
44
45
  options[:format] ||= @format
45
46
 
46
47
  author_extractor = Biblionet::Extractors::AuthorExtractor.new
47
- author = author_extractor.load_and_extract_author(uri)
48
-
49
- response = {}
48
+ author = author_extractor.load_and_extract_author(uri)
49
+
50
+ response = {}
50
51
  response[:author] = !author.nil? ? [author] : []
51
52
  response = change_format(response, options[:format])
52
53
  return response
@@ -58,90 +59,109 @@ module Bookshark
58
59
 
59
60
  publisher_extractor = Biblionet::Extractors::PublisherExtractor.new
60
61
  publisher = publisher_extractor.load_and_extract_publisher(uri)
61
-
62
- response = {}
62
+
63
+ response = {}
63
64
  response[:publisher] = !publisher.nil? ? [publisher] : []
64
65
  response = change_format(response, options[:format])
65
66
  response = publisher_extractor.decode_text(response)
66
67
 
67
68
  return response
68
- # return uri
69
- end
69
+ # return uri
70
+ end
70
71
 
71
72
  def book(options = {})
72
- book_extractor = Biblionet::Extractors::BookExtractor.new
73
-
74
- if book_extractor.present?(options[:isbn])
75
- search_engine = Biblionet::Extractors::Search.new
76
- options[:id] = search_engine.search_by_isbn(options[:isbn])
77
- end
73
+ options[:site] ||= @site
78
74
 
79
- uri = process_options(options, __method__)
80
- options[:format] ||= @format
81
- options[:eager] ||= false
82
- options[:nilify] ||= false
83
-
84
- if options[:eager]
85
- book = eager_extract_book(uri)
86
- else
87
- book = book_extractor.load_and_extract_book(uri)
88
- end
75
+ if options[:site] == 'biblionet'
76
+ book_extractor = Biblionet::Extractors::BookExtractor.new
89
77
 
90
- response = {}
91
- response[:book] = !book.nil? ? [book] : []
78
+ if book_extractor.present?(options[:isbn])
79
+ search_engine = Biblionet::Extractors::Search.new
80
+ options[:id] = search_engine.search_by_isbn(options[:isbn])
81
+ end
92
82
 
93
- return nil if response[:book].empty? and options[:nilify]
94
-
95
- response = change_format(response, options[:format])
96
-
97
- response = book_extractor.decode_text(response) if response.class == "String"
98
-
99
- return response
83
+ uri = process_options(options, __method__)
84
+ options[:format] ||= @format
85
+ options[:eager] ||= false
86
+ options[:nilify] ||= false
87
+
88
+ if options[:eager]
89
+ book = eager_extract_book(uri)
90
+ else
91
+ book = book_extractor.load_and_extract_book(uri)
92
+ end
93
+
94
+ response = {}
95
+ response[:book] = !book.nil? ? [book] : []
96
+
97
+ return nil if response[:book].empty? and options[:nilify]
98
+
99
+ response = change_format(response, options[:format])
100
+
101
+ response = book_extractor.decode_text(response) if response.class == "String"
102
+
103
+ return response
104
+ elsif options[:site] == 'nlg'
105
+ book_extractor = Nlg::Extractors::BookExtractor.new
106
+
107
+ options[:format] ||= @format
108
+
109
+ # if !options[:uri].nil?
110
+ # uri = "#{options[:uri]}/Export?style=MARCXML"
111
+ # elsif !options[:id].nil?
112
+ # uri = "http://nbib.nlg.gr/Record/#{options[:id]}/Export?style=MARCXML"
113
+ # end
114
+
115
+ book = book_extractor.load_and_extract_book(options[:id])
116
+
117
+ response = {}
118
+ response[:book] = !book.nil? ? [book] : []
119
+ end
100
120
  end
101
121
 
102
-
122
+
103
123
  # def bibliographical_book(options = {})
104
124
  # bibliographical_book_extractor = Biblionet::Extractors::BibliographicalBookExtractor.new
105
-
125
+
106
126
  # uri = "http://www.biblionet.gr/main.asp?page=results&Titlesid=#{options[:id]}"
107
127
  # options[:format] ||= @format
108
-
128
+
109
129
  # book = bibliographical_book_extractor.load_and_extract_book(uri)
110
-
111
- # response = {}
130
+
131
+ # response = {}
112
132
  # response[:book] = !book.nil? ? [book] : []
113
133
  # response = change_format(response, options[:format])
114
- # response = bibliographical_book_extractor.decode_text(response)
115
- # end
116
-
134
+ # response = bibliographical_book_extractor.decode_text(response)
135
+ # end
136
+
117
137
  # puts Bookshark::Extractor.new(format: 'pretty_json').bibliographical_book(id: 103788)
118
138
 
119
139
  def category(options = {})
120
140
  uri = process_options(options, __method__)
121
- options[:format] ||= @format
141
+ options[:format] ||= @format
122
142
 
123
143
  category_extractor = Biblionet::Extractors::CategoryExtractor.new
124
144
  category = category_extractor.extract_categories_from(uri)
125
145
 
126
- response = {}
146
+ response = {}
127
147
  response[:category] = !category.nil? ? [category] : []
128
148
  response = change_format(response, options[:format])
129
-
130
- return response
149
+
150
+ return response
131
151
  end
132
152
 
133
153
  def search(options = {})
134
154
  options[:format] ||= @format
135
- options[:results_type] ||= 'metadata'
155
+ options[:results_type] ||= 'metadata'
136
156
 
137
157
  search_engine = Biblionet::Extractors::Search.new
138
158
  search_results = search_engine.perform_search(options)
139
159
 
140
- response = {}
160
+ response = {}
141
161
  response[:book] = search_results
142
162
  response = change_format(response, options[:format])
143
-
144
- return response
163
+
164
+ return response
145
165
  end
146
166
 
147
167
  # def books_from_storage
@@ -165,22 +185,22 @@ module Bookshark
165
185
  record = book(id: book_id, local: true, format: format, nilify: true)
166
186
 
167
187
  dir_to_save = Bookshark.path_to_storage + '/' + 'json_book_records/' + "#{((book_id-1)/1000)}/" + "book_#{book_id}.json"
168
-
188
+
169
189
  save_to(dir_to_save, record) unless record.nil?
170
190
  end
171
191
  end
172
192
 
173
193
 
174
- def extract_from_storage_and_save(metadata_type, source_dir, target_dir)
194
+ def extract_from_storage_and_save(metadata_type, source_dir, target_dir)
175
195
  list_directories(path: Bookshark.path_to_storage + '/' + source_dir).each do |dir|
176
- dir_to_save = dir.gsub(source_dir, target_dir)
196
+ dir_to_save = dir.gsub(source_dir, target_dir)
177
197
 
178
198
  list_files(path: dir, extension: 'html', all:true).each do |file|
179
- puts "Extracting from file: " + file.to_s
199
+ puts "Extracting from file: " + file.to_s
180
200
 
181
201
  # Extract publisher metadata form local file.
182
- options = {uri: file, format: 'pretty_json', local: true}
183
-
202
+ options = {uri: file, format: 'pretty_json', local: true}
203
+
184
204
  case metadata_type
185
205
  when 'author'
186
206
  record = author(options)
@@ -189,16 +209,16 @@ module Bookshark
189
209
  # when 'book'
190
210
  # record = book(options)
191
211
  when 'category'
192
- record = category(options)
193
- end
212
+ record = category(options)
213
+ end
194
214
 
195
215
  # Prepare a path to save the new file.
196
216
  filename = File.basename(file,".*")
197
217
  path_to_save = "#{dir_to_save}#{filename}.json"
198
-
199
- # Save to file.
218
+
219
+ # Save to file.
200
220
  save_to("#{path_to_save}", record)
201
-
221
+
202
222
  end # unless File.directory?(dir_to_save) # if dir.end_with? '/195/'
203
223
  end
204
224
  end
@@ -209,9 +229,9 @@ module Bookshark
209
229
  # end
210
230
  category_extractor = Biblionet::Extractors::CategoryExtractor.new
211
231
  all_categories = Hash.new
212
-
232
+
213
233
  list_files(path: 'storage/raw_ddc_pages', extension: 'html', all:true).each do |file|
214
- categories = category_extractor.extract_categories_from(file)
234
+ categories = category_extractor.extract_categories_from(file)
215
235
  all_categories.merge!(categories) unless categories.nil? or categories.empty?
216
236
  end
217
237
 
@@ -228,19 +248,19 @@ module Bookshark
228
248
 
229
249
  list_directories(path: 'storage/raw_html_pages').each do |dir|
230
250
  dir_to_save = dir.gsub(/raw_html_pages/, 'books')
231
-
232
- list_files(path: dir, extension: 'html', all:true).each do |file|
233
-
251
+
252
+ list_files(path: dir, extension: 'html', all:true).each do |file|
253
+
234
254
  # Load the book from html file and parse the data.
235
255
  # pp "Parsing book: #{file}"
236
256
  pp file
237
257
  book = bp.load_and_extract_book(file)
238
-
258
+
239
259
  # Prepare a path to save the new file.
240
260
  filename = File.basename(file,".*")
241
261
  path_to_save = "#{dir_to_save}#{filename}.json"
242
-
243
- # Save to file.
262
+
263
+ # Save to file.
244
264
  bp.save_to("#{path_to_save}", JSON.pretty_generate(book))
245
265
  # pp "Book #{file} saved!"
246
266
  end unless File.directory?(dir_to_save) # if dir.end_with? '/195/'
@@ -266,11 +286,11 @@ module Bookshark
266
286
  url_method = 'book'
267
287
  local_path = "html_book_pages/#{((id-1)/1000)}/book_#{id}.html"
268
288
  when 'category'
269
- url_method = 'index'
270
- local_path = "html_ddc_pages/#{((id-1)/1000)}/ddc_#{id}.html"
289
+ url_method = 'index'
290
+ local_path = "html_ddc_pages/#{((id-1)/1000)}/ddc_#{id}.html"
271
291
  else
272
292
  puts "Called from unknown method. Probably its rspec."
273
- end
293
+ end
274
294
 
275
295
  options[:local] ||= false
276
296
  url = "#{Bookshark::path_to_storage}/#{local_path}" if options[:local]
@@ -279,7 +299,7 @@ module Bookshark
279
299
  uri = options[:uri] ||= url
280
300
 
281
301
  return uri
282
- end
302
+ end
283
303
 
284
304
  def change_format(hash, format)
285
305
  case format
@@ -288,10 +308,10 @@ module Bookshark
288
308
  when 'json'
289
309
  hash = hash.to_json
290
310
  when 'pretty_json'
291
- hash = JSON.pretty_generate(hash)
311
+ hash = JSON.pretty_generate(hash)
292
312
  end
293
313
  return hash
294
- end
314
+ end
295
315
 
296
316
  def eager_extract_book(uri)
297
317
  book_extractor = Biblionet::Extractors::BookExtractor.new
@@ -301,13 +321,13 @@ module Bookshark
301
321
 
302
322
  book = book_extractor.load_and_extract_book(uri)
303
323
 
304
- tmp_data = []
324
+ tmp_data = []
305
325
  book[:author].each do |author|
306
- tmp_data << author_extractor.load_and_extract_author("http://www.biblionet.gr/author/#{author[:b_id]}")
326
+ tmp_data << author_extractor.load_and_extract_author("http://www.biblionet.gr/author/#{author[:b_id]}")
307
327
  end
308
- book[:author] = tmp_data
309
-
310
- tmp_data, tmp_hash = [], {}
328
+ book[:author] = tmp_data
329
+
330
+ tmp_data, tmp_hash = [], {}
311
331
  book[:contributors].each do |job, contributors|
312
332
  contributors.each do |contributor|
313
333
  tmp_data << author_extractor.load_and_extract_author("http://www.biblionet.gr/author/#{contributor[:b_id]}")
@@ -317,19 +337,19 @@ module Bookshark
317
337
  end
318
338
  book[:contributors] = tmp_hash
319
339
 
320
- tmp_data, tmp_hash = [], {}
340
+ tmp_data, tmp_hash = [], {}
321
341
  book[:category].each do |category|
322
342
  tmp_data << category_extractor.extract_categories_from("http://www.biblionet.gr/index/#{category[:b_id]}")
323
343
  end
324
- book[:category] = tmp_data
325
-
326
- tmp_data = []
327
- tmp_data << publisher_extractor.load_and_extract_publisher("http://www.biblionet.gr/com/#{book[:publisher][:b_id]}")
344
+ book[:category] = tmp_data
345
+
346
+ tmp_data = []
347
+ tmp_data << publisher_extractor.load_and_extract_publisher("http://www.biblionet.gr/com/#{book[:publisher][:b_id]}")
328
348
  book[:publisher] = tmp_data
329
349
 
330
350
  book
331
- end
332
-
351
+ end
352
+
333
353
  end
334
354
 
335
355
 
@@ -339,7 +359,7 @@ module Bookshark
339
359
 
340
360
  def initialize(options = {})
341
361
  options = DEFAULTS.merge(options)
342
- @site = options[:site]
362
+ @site = options[:site]
343
363
  end
344
364
 
345
365
  def publishers
@@ -362,11 +382,11 @@ module Bookshark
362
382
  crawler.crawl_and_save
363
383
  end
364
384
 
365
- end
385
+ end
366
386
 
367
387
  # module Biblionet
368
388
  # class Extract
369
- # class << self
389
+ # class << self
370
390
  # def author(uri=nil)
371
391
  # author_extractor = BiblionetParser::Core::AuthorExtractor.new
372
392
  # author_extractor.load_and_extract_author(uri)
@@ -384,7 +404,7 @@ module Bookshark
384
404
 
385
405
  # end
386
406
  # end
387
- # end
407
+ # end
388
408
  end
389
409
 
390
410
 
@@ -467,4 +487,4 @@ end
467
487
  # Problematic at biblionet
468
488
  # http://biblionet.gr/book/196388
469
489
  # http://biblionet.gr/book/196386
470
- # http://biblionet.gr/book/195525
490
+ # http://biblionet.gr/book/195525
@@ -170,10 +170,17 @@ module Biblionet
170
170
  text: publisher_node.text,
171
171
  b_id: (publisher_node[:href].split("/"))[2]
172
172
  }
173
- after_last_author_text = @nodeset
173
+ last_author = @nodeset
174
174
  .xpath("//a[@class='booklink' and @href[contains(.,'/author/') ]][last()]").last
175
- .next_sibling.text.strip
176
- puts after_last_author_text
175
+
176
+ if !last_author.nil? && !last_author.empty?
177
+ after_last_author_text = last_author.next_sibling.text.strip
178
+ else
179
+ last_book = @nodeset
180
+ .xpath("//a[@class='booklink' and @href[contains(.,'/book/') ]][last()]").last
181
+ after_last_author_text = last_book.next_sibling.text.strip
182
+ end
183
+
177
184
  details_hash[:publication] = {
178
185
  year: after_last_author_text[/(?<=: )\d+(?=\.)/],
179
186
  version: after_last_author_text[/(?<=- )\d+(?=η)/],
@@ -0,0 +1,110 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'rubygems'
5
+ require 'json'
6
+ require 'logger'
7
+ require 'pp'
8
+ require 'marc'
9
+ require 'htmlentities'
10
+
11
+ module Nlg
12
+ module Extractors
13
+
14
+ class Base
15
+
16
+ attr_reader :url, :nlg_id, :page
17
+
18
+ def initialize(id=nil)
19
+ load_page(id)
20
+ end
21
+
22
+ def load_page(id=nil)
23
+ load_page_by_id(id) unless id.nil?
24
+ end
25
+
26
+ def load_page_by_id(id)
27
+ begin
28
+ @nlg_id = id unless id.nil? # id is expected to be the last number.
29
+ @url = "http://nbib.nlg.gr/Record/#{@nlg_id}/Export?style=MARCXML"
30
+
31
+ pp "Downloading page: #{@url}"
32
+
33
+ Net::HTTP.start("nbib.nlg.gr") do |http|
34
+ response = http.get("/Record/#{@nlg_id}/Export?style=MARCXML")
35
+ pp response.content_type
36
+ pp response.code
37
+ raise EmptyPageError.new(@url) unless response.content_type == "text/xml" && response.code == "200"
38
+
39
+ @page = response.body
40
+ end
41
+
42
+ rescue Errno::ENOENT => e
43
+ pp "Page: #{@url} NOT FOUND."
44
+ pp e
45
+ rescue EmptyPageError => e
46
+ pp "Page: #{@url} is EMPTY."
47
+ pp e
48
+ @page = nil
49
+ rescue OpenURI::HTTPError => e
50
+ pp e
51
+ pp e.io.status
52
+ rescue StandardError => e
53
+ pp "Generic error #{e.class}. Will wait for 2 minutes and then try again."
54
+ pp e
55
+ sleep(120)
56
+ retry
57
+ end
58
+ end
59
+
60
+ # Decodes text with escaped html entities and returns the decoded text.
61
+ #
62
+ # ==== Params:
63
+ #
64
+ # +encoded_text+:: the text which contains encoded entities
65
+ #
66
+ def decode_text(encoded_text)
67
+ self.class.decode_text(encoded_text)
68
+ end
69
+
70
+ def self.decode_text(encoded_text)
71
+ # encoded_text = File.read(encoded_file_path)
72
+ coder = HTMLEntities.new
73
+ coder.decode(encoded_text)
74
+ end
75
+
76
+ def present?(value)
77
+ return (not value.nil? and not value.empty?) ? true : false
78
+ end
79
+
80
+ end
81
+
82
+ # Raised when a page is considered empty.
83
+ #
84
+ class EmptyPageError < StandardError
85
+ attr_reader :url
86
+
87
+ def initialize(url)
88
+ @url = url
89
+
90
+ msg = "Page: #{url} is not valid xml so it is considered EMPTY."
91
+ super(msg)
92
+ end
93
+ end
94
+
95
+ # Raised when something unexpected or in wrong format is parsed.
96
+ #
97
+ class NoIdeaWhatThisIsError < StandardError
98
+ attr_reader :nlg_id, :the_unexpected
99
+
100
+ def initialize(nlg_id, the_unexpected)
101
+ @nlg_id = nlg_id
102
+ @the_unexpected = the_unexpected
103
+
104
+ msg = "We have no idea what this: #{the_unexpected} is. At book #{nlg_id}"
105
+ super(msg)
106
+ end
107
+ end
108
+
109
+ end
110
+ end
@@ -0,0 +1,28 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require_relative 'base'
5
+
6
+ module Nlg
7
+ module Extractors
8
+
9
+ class BookExtractor < Base
10
+ attr_reader :book
11
+
12
+ def initialize(id=nil)
13
+ super(id)
14
+ extract_book unless id.nil? or @page.nil?
15
+ end
16
+
17
+ def load_and_extract_book(id=nil)
18
+ load_page(id)
19
+ extract_book unless id.nil? or @page.nil?
20
+ end
21
+
22
+ def extract_book(nlg_id=@nlg_id, book_page=@page)
23
+ puts "should extract book #{nlg_id} from nlg"
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module Bookshark
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.3"
3
3
  end
@@ -42,10 +42,10 @@
42
42
  "name": "Πανεπιστημιακές Εκδόσεις Κρήτης",
43
43
  "owner": "Στέφανος Τραχανάς",
44
44
  "bookstores": {
45
- "Υποκατάστημα": {
45
+ "&Upsilon;&pi;&omicron;&kappa;&alpha;&tau;&#940;&sigma;&tau;&eta;&mu;&alpha;": {
46
46
  "address": [
47
- "Κλεισόβης 3",
48
- "106 77 Αθήνα"
47
+ "&Kappa;&lambda;&epsilon;&iota;&sigma;&#972;&beta;&eta;&sigmaf; 3",
48
+ "106 77 &Alpha;&theta;&#942;&nu;&alpha;"
49
49
  ],
50
50
  "telephone": [
51
51
  "210 38490203"
@@ -372,7 +372,7 @@
372
372
  "format": "Βιβλίο",
373
373
  "original_language": null,
374
374
  "original_title": null,
375
- "price": "6,85",
375
+ "price": "6,82",
376
376
  "availability": "Κυκλοφορεί",
377
377
  "last_update": null,
378
378
  "series": {
@@ -421,7 +421,7 @@
421
421
  "format": "Βιβλίο",
422
422
  "original_language": null,
423
423
  "original_title": null,
424
- "price": "3,73",
424
+ "price": "3,71",
425
425
  "availability": "Κυκλοφορεί",
426
426
  "last_update": null,
427
427
  "series": {
@@ -445,7 +445,7 @@
445
445
  "contributors": {
446
446
  },
447
447
  "publisher": {
448
- "text": "Δωδώνη Εκδοτική ΕΠΕ",
448
+ "text": "Δωδώνη",
449
449
  "b_id": "1"
450
450
  },
451
451
  "isbn": "960-248-541-8",
@@ -470,7 +470,7 @@
470
470
  "format": "Βιβλίο",
471
471
  "original_language": null,
472
472
  "original_title": null,
473
- "price": "10,60",
473
+ "price": "10,55",
474
474
  "availability": "Κυκλοφορεί",
475
475
  "last_update": null,
476
476
  "series": {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bookshark
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitris Klisiaris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-23 00:00:00.000000000 Z
11
+ date: 2017-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -72,6 +72,20 @@ dependencies:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
74
  version: '4.3'
75
+ - !ruby/object:Gem::Dependency
76
+ name: marc
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.0'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.0'
75
89
  - !ruby/object:Gem::Dependency
76
90
  name: bundler
77
91
  requirement: !ruby/object:Gem::Requirement
@@ -128,6 +142,20 @@ dependencies:
128
142
  - - "~>"
129
143
  - !ruby/object:Gem::Version
130
144
  version: '1.2'
145
+ - !ruby/object:Gem::Dependency
146
+ name: pry-byebug
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '3.4'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '3.4'
131
159
  description: Extracts book, author, publisher and category metadata from biblionet.gr.
132
160
  email:
133
161
  - dklisiaris@gmail.com
@@ -155,6 +183,8 @@ files:
155
183
  - lib/bookshark/extractors/bibliographical_book_extractor.rb
156
184
  - lib/bookshark/extractors/book_extractor.rb
157
185
  - lib/bookshark/extractors/category_extractor.rb
186
+ - lib/bookshark/extractors/nlg/base.rb
187
+ - lib/bookshark/extractors/nlg/book_extractor.rb
158
188
  - lib/bookshark/extractors/publisher_extractor.rb
159
189
  - lib/bookshark/extractors/search.rb
160
190
  - lib/bookshark/storage/file_manager.rb