bookshark 1.0.1 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d35fee946c6b6dcf4ca740d89ba3a9cb89f36a94
4
- data.tar.gz: ff928cdadd16b132adc9f193ff5c7f565a0b0398
3
+ metadata.gz: '03852f73c9246676ff20b75b0a893998e967f2c7'
4
+ data.tar.gz: 7fe938710c2e9344563395e5db7cf3e7eaf76b25
5
5
  SHA512:
6
- metadata.gz: 2dac9ad4842172d896a488fa60baaf13d862c8d2e3b1e68b3a9f5e6ee28bec5136d4ed7d084c4bf6fa0f5f1cfd3224241958467c7df99929ea8cfc1c5ad92abc
7
- data.tar.gz: e8f9dcb4f20e0a2330a91588c6dfbb306a74820ee9ed7fa7564097013c16244de46ffc3a636a751126f19ce9c1a32b209fb254d7533d901d6f77a00a6b8b5100
6
+ metadata.gz: 3ccd4cfd0e82aa6918304e82df18f382f2b7e33470c9f52ff362b4897aaf53b703a04bd2689871925293d7e084d3e0008c74aa289fb36a106be7dca20c01ee0f
7
+ data.tar.gz: fb4590ee8c1a24402f48b3502ddb77e24fbbde2c988a18fb62fe867762950f351d3271512773602bd9cec47d7e51a6cb94de1e2d695cf7c0d8704a39bc72f905
@@ -24,9 +24,11 @@ Gem::Specification.new do |spec|
24
24
  spec.add_dependency "sanitize", "~> 4.0"
25
25
  spec.add_dependency "json", "~> 1.8"
26
26
  spec.add_dependency "htmlentities", "~> 4.3"
27
+ spec.add_dependency "marc", "~> 1.0"
27
28
 
28
29
  spec.add_development_dependency "bundler", ">= 1.6"
29
30
  spec.add_development_dependency "rake", "~> 10.0"
30
31
  spec.add_development_dependency 'rspec', "~> 3.2"
31
32
  spec.add_development_dependency "webmock", "~> 1.2"
33
+ spec.add_development_dependency "pry-byebug", "~> 3.4"
32
34
  end
@@ -7,6 +7,7 @@ require 'bookshark/extractors/book_extractor'
7
7
  require 'bookshark/extractors/bibliographical_book_extractor'
8
8
  require 'bookshark/extractors/publisher_extractor'
9
9
  require 'bookshark/extractors/search'
10
+ require 'bookshark/extractors/nlg/book_extractor'
10
11
 
11
12
  require 'bookshark/crawlers/base'
12
13
  require 'bookshark/crawlers/publisher_crawler'
@@ -22,8 +23,8 @@ module Bookshark
22
23
  def self.root
23
24
  # File.dirname __dir__ # Works only on ruby > 2.0.0
24
25
  File.expand_path(File.join(File.dirname(__FILE__), '../'))
25
- end
26
-
26
+ end
27
+
27
28
  def self.path_to_storage
28
29
  File.join root, 'lib/bookshark/storage'
29
30
  end
@@ -31,7 +32,7 @@ module Bookshark
31
32
 
32
33
  class Extractor
33
34
  include FileManager
34
- attr_accessor :site, :format
35
+ attr_accessor :site, :format
35
36
 
36
37
  def initialize(options = {})
37
38
  options = DEFAULTS.merge(options)
@@ -44,9 +45,9 @@ module Bookshark
44
45
  options[:format] ||= @format
45
46
 
46
47
  author_extractor = Biblionet::Extractors::AuthorExtractor.new
47
- author = author_extractor.load_and_extract_author(uri)
48
-
49
- response = {}
48
+ author = author_extractor.load_and_extract_author(uri)
49
+
50
+ response = {}
50
51
  response[:author] = !author.nil? ? [author] : []
51
52
  response = change_format(response, options[:format])
52
53
  return response
@@ -58,90 +59,109 @@ module Bookshark
58
59
 
59
60
  publisher_extractor = Biblionet::Extractors::PublisherExtractor.new
60
61
  publisher = publisher_extractor.load_and_extract_publisher(uri)
61
-
62
- response = {}
62
+
63
+ response = {}
63
64
  response[:publisher] = !publisher.nil? ? [publisher] : []
64
65
  response = change_format(response, options[:format])
65
66
  response = publisher_extractor.decode_text(response)
66
67
 
67
68
  return response
68
- # return uri
69
- end
69
+ # return uri
70
+ end
70
71
 
71
72
  def book(options = {})
72
- book_extractor = Biblionet::Extractors::BookExtractor.new
73
-
74
- if book_extractor.present?(options[:isbn])
75
- search_engine = Biblionet::Extractors::Search.new
76
- options[:id] = search_engine.search_by_isbn(options[:isbn])
77
- end
73
+ options[:site] ||= @site
78
74
 
79
- uri = process_options(options, __method__)
80
- options[:format] ||= @format
81
- options[:eager] ||= false
82
- options[:nilify] ||= false
83
-
84
- if options[:eager]
85
- book = eager_extract_book(uri)
86
- else
87
- book = book_extractor.load_and_extract_book(uri)
88
- end
75
+ if options[:site] == 'biblionet'
76
+ book_extractor = Biblionet::Extractors::BookExtractor.new
89
77
 
90
- response = {}
91
- response[:book] = !book.nil? ? [book] : []
78
+ if book_extractor.present?(options[:isbn])
79
+ search_engine = Biblionet::Extractors::Search.new
80
+ options[:id] = search_engine.search_by_isbn(options[:isbn])
81
+ end
92
82
 
93
- return nil if response[:book].empty? and options[:nilify]
94
-
95
- response = change_format(response, options[:format])
96
-
97
- response = book_extractor.decode_text(response) if response.class == "String"
98
-
99
- return response
83
+ uri = process_options(options, __method__)
84
+ options[:format] ||= @format
85
+ options[:eager] ||= false
86
+ options[:nilify] ||= false
87
+
88
+ if options[:eager]
89
+ book = eager_extract_book(uri)
90
+ else
91
+ book = book_extractor.load_and_extract_book(uri)
92
+ end
93
+
94
+ response = {}
95
+ response[:book] = !book.nil? ? [book] : []
96
+
97
+ return nil if response[:book].empty? and options[:nilify]
98
+
99
+ response = change_format(response, options[:format])
100
+
101
+ response = book_extractor.decode_text(response) if response.class == "String"
102
+
103
+ return response
104
+ elsif options[:site] == 'nlg'
105
+ book_extractor = Nlg::Extractors::BookExtractor.new
106
+
107
+ options[:format] ||= @format
108
+
109
+ # if !options[:uri].nil?
110
+ # uri = "#{options[:uri]}/Export?style=MARCXML"
111
+ # elsif !options[:id].nil?
112
+ # uri = "http://nbib.nlg.gr/Record/#{options[:id]}/Export?style=MARCXML"
113
+ # end
114
+
115
+ book = book_extractor.load_and_extract_book(options[:id])
116
+
117
+ response = {}
118
+ response[:book] = !book.nil? ? [book] : []
119
+ end
100
120
  end
101
121
 
102
-
122
+
103
123
  # def bibliographical_book(options = {})
104
124
  # bibliographical_book_extractor = Biblionet::Extractors::BibliographicalBookExtractor.new
105
-
125
+
106
126
  # uri = "http://www.biblionet.gr/main.asp?page=results&Titlesid=#{options[:id]}"
107
127
  # options[:format] ||= @format
108
-
128
+
109
129
  # book = bibliographical_book_extractor.load_and_extract_book(uri)
110
-
111
- # response = {}
130
+
131
+ # response = {}
112
132
  # response[:book] = !book.nil? ? [book] : []
113
133
  # response = change_format(response, options[:format])
114
- # response = bibliographical_book_extractor.decode_text(response)
115
- # end
116
-
134
+ # response = bibliographical_book_extractor.decode_text(response)
135
+ # end
136
+
117
137
  # puts Bookshark::Extractor.new(format: 'pretty_json').bibliographical_book(id: 103788)
118
138
 
119
139
  def category(options = {})
120
140
  uri = process_options(options, __method__)
121
- options[:format] ||= @format
141
+ options[:format] ||= @format
122
142
 
123
143
  category_extractor = Biblionet::Extractors::CategoryExtractor.new
124
144
  category = category_extractor.extract_categories_from(uri)
125
145
 
126
- response = {}
146
+ response = {}
127
147
  response[:category] = !category.nil? ? [category] : []
128
148
  response = change_format(response, options[:format])
129
-
130
- return response
149
+
150
+ return response
131
151
  end
132
152
 
133
153
  def search(options = {})
134
154
  options[:format] ||= @format
135
- options[:results_type] ||= 'metadata'
155
+ options[:results_type] ||= 'metadata'
136
156
 
137
157
  search_engine = Biblionet::Extractors::Search.new
138
158
  search_results = search_engine.perform_search(options)
139
159
 
140
- response = {}
160
+ response = {}
141
161
  response[:book] = search_results
142
162
  response = change_format(response, options[:format])
143
-
144
- return response
163
+
164
+ return response
145
165
  end
146
166
 
147
167
  # def books_from_storage
@@ -165,22 +185,22 @@ module Bookshark
165
185
  record = book(id: book_id, local: true, format: format, nilify: true)
166
186
 
167
187
  dir_to_save = Bookshark.path_to_storage + '/' + 'json_book_records/' + "#{((book_id-1)/1000)}/" + "book_#{book_id}.json"
168
-
188
+
169
189
  save_to(dir_to_save, record) unless record.nil?
170
190
  end
171
191
  end
172
192
 
173
193
 
174
- def extract_from_storage_and_save(metadata_type, source_dir, target_dir)
194
+ def extract_from_storage_and_save(metadata_type, source_dir, target_dir)
175
195
  list_directories(path: Bookshark.path_to_storage + '/' + source_dir).each do |dir|
176
- dir_to_save = dir.gsub(source_dir, target_dir)
196
+ dir_to_save = dir.gsub(source_dir, target_dir)
177
197
 
178
198
  list_files(path: dir, extension: 'html', all:true).each do |file|
179
- puts "Extracting from file: " + file.to_s
199
+ puts "Extracting from file: " + file.to_s
180
200
 
181
201
  # Extract publisher metadata form local file.
182
- options = {uri: file, format: 'pretty_json', local: true}
183
-
202
+ options = {uri: file, format: 'pretty_json', local: true}
203
+
184
204
  case metadata_type
185
205
  when 'author'
186
206
  record = author(options)
@@ -189,16 +209,16 @@ module Bookshark
189
209
  # when 'book'
190
210
  # record = book(options)
191
211
  when 'category'
192
- record = category(options)
193
- end
212
+ record = category(options)
213
+ end
194
214
 
195
215
  # Prepare a path to save the new file.
196
216
  filename = File.basename(file,".*")
197
217
  path_to_save = "#{dir_to_save}#{filename}.json"
198
-
199
- # Save to file.
218
+
219
+ # Save to file.
200
220
  save_to("#{path_to_save}", record)
201
-
221
+
202
222
  end # unless File.directory?(dir_to_save) # if dir.end_with? '/195/'
203
223
  end
204
224
  end
@@ -209,9 +229,9 @@ module Bookshark
209
229
  # end
210
230
  category_extractor = Biblionet::Extractors::CategoryExtractor.new
211
231
  all_categories = Hash.new
212
-
232
+
213
233
  list_files(path: 'storage/raw_ddc_pages', extension: 'html', all:true).each do |file|
214
- categories = category_extractor.extract_categories_from(file)
234
+ categories = category_extractor.extract_categories_from(file)
215
235
  all_categories.merge!(categories) unless categories.nil? or categories.empty?
216
236
  end
217
237
 
@@ -228,19 +248,19 @@ module Bookshark
228
248
 
229
249
  list_directories(path: 'storage/raw_html_pages').each do |dir|
230
250
  dir_to_save = dir.gsub(/raw_html_pages/, 'books')
231
-
232
- list_files(path: dir, extension: 'html', all:true).each do |file|
233
-
251
+
252
+ list_files(path: dir, extension: 'html', all:true).each do |file|
253
+
234
254
  # Load the book from html file and parse the data.
235
255
  # pp "Parsing book: #{file}"
236
256
  pp file
237
257
  book = bp.load_and_extract_book(file)
238
-
258
+
239
259
  # Prepare a path to save the new file.
240
260
  filename = File.basename(file,".*")
241
261
  path_to_save = "#{dir_to_save}#{filename}.json"
242
-
243
- # Save to file.
262
+
263
+ # Save to file.
244
264
  bp.save_to("#{path_to_save}", JSON.pretty_generate(book))
245
265
  # pp "Book #{file} saved!"
246
266
  end unless File.directory?(dir_to_save) # if dir.end_with? '/195/'
@@ -266,11 +286,11 @@ module Bookshark
266
286
  url_method = 'book'
267
287
  local_path = "html_book_pages/#{((id-1)/1000)}/book_#{id}.html"
268
288
  when 'category'
269
- url_method = 'index'
270
- local_path = "html_ddc_pages/#{((id-1)/1000)}/ddc_#{id}.html"
289
+ url_method = 'index'
290
+ local_path = "html_ddc_pages/#{((id-1)/1000)}/ddc_#{id}.html"
271
291
  else
272
292
  puts "Called from unknown method. Probably its rspec."
273
- end
293
+ end
274
294
 
275
295
  options[:local] ||= false
276
296
  url = "#{Bookshark::path_to_storage}/#{local_path}" if options[:local]
@@ -279,7 +299,7 @@ module Bookshark
279
299
  uri = options[:uri] ||= url
280
300
 
281
301
  return uri
282
- end
302
+ end
283
303
 
284
304
  def change_format(hash, format)
285
305
  case format
@@ -288,10 +308,10 @@ module Bookshark
288
308
  when 'json'
289
309
  hash = hash.to_json
290
310
  when 'pretty_json'
291
- hash = JSON.pretty_generate(hash)
311
+ hash = JSON.pretty_generate(hash)
292
312
  end
293
313
  return hash
294
- end
314
+ end
295
315
 
296
316
  def eager_extract_book(uri)
297
317
  book_extractor = Biblionet::Extractors::BookExtractor.new
@@ -301,13 +321,13 @@ module Bookshark
301
321
 
302
322
  book = book_extractor.load_and_extract_book(uri)
303
323
 
304
- tmp_data = []
324
+ tmp_data = []
305
325
  book[:author].each do |author|
306
- tmp_data << author_extractor.load_and_extract_author("http://www.biblionet.gr/author/#{author[:b_id]}")
326
+ tmp_data << author_extractor.load_and_extract_author("http://www.biblionet.gr/author/#{author[:b_id]}")
307
327
  end
308
- book[:author] = tmp_data
309
-
310
- tmp_data, tmp_hash = [], {}
328
+ book[:author] = tmp_data
329
+
330
+ tmp_data, tmp_hash = [], {}
311
331
  book[:contributors].each do |job, contributors|
312
332
  contributors.each do |contributor|
313
333
  tmp_data << author_extractor.load_and_extract_author("http://www.biblionet.gr/author/#{contributor[:b_id]}")
@@ -317,19 +337,19 @@ module Bookshark
317
337
  end
318
338
  book[:contributors] = tmp_hash
319
339
 
320
- tmp_data, tmp_hash = [], {}
340
+ tmp_data, tmp_hash = [], {}
321
341
  book[:category].each do |category|
322
342
  tmp_data << category_extractor.extract_categories_from("http://www.biblionet.gr/index/#{category[:b_id]}")
323
343
  end
324
- book[:category] = tmp_data
325
-
326
- tmp_data = []
327
- tmp_data << publisher_extractor.load_and_extract_publisher("http://www.biblionet.gr/com/#{book[:publisher][:b_id]}")
344
+ book[:category] = tmp_data
345
+
346
+ tmp_data = []
347
+ tmp_data << publisher_extractor.load_and_extract_publisher("http://www.biblionet.gr/com/#{book[:publisher][:b_id]}")
328
348
  book[:publisher] = tmp_data
329
349
 
330
350
  book
331
- end
332
-
351
+ end
352
+
333
353
  end
334
354
 
335
355
 
@@ -339,7 +359,7 @@ module Bookshark
339
359
 
340
360
  def initialize(options = {})
341
361
  options = DEFAULTS.merge(options)
342
- @site = options[:site]
362
+ @site = options[:site]
343
363
  end
344
364
 
345
365
  def publishers
@@ -362,11 +382,11 @@ module Bookshark
362
382
  crawler.crawl_and_save
363
383
  end
364
384
 
365
- end
385
+ end
366
386
 
367
387
  # module Biblionet
368
388
  # class Extract
369
- # class << self
389
+ # class << self
370
390
  # def author(uri=nil)
371
391
  # author_extractor = BiblionetParser::Core::AuthorExtractor.new
372
392
  # author_extractor.load_and_extract_author(uri)
@@ -384,7 +404,7 @@ module Bookshark
384
404
 
385
405
  # end
386
406
  # end
387
- # end
407
+ # end
388
408
  end
389
409
 
390
410
 
@@ -467,4 +487,4 @@ end
467
487
  # Problematic at biblionet
468
488
  # http://biblionet.gr/book/196388
469
489
  # http://biblionet.gr/book/196386
470
- # http://biblionet.gr/book/195525
490
+ # http://biblionet.gr/book/195525
@@ -170,10 +170,17 @@ module Biblionet
170
170
  text: publisher_node.text,
171
171
  b_id: (publisher_node[:href].split("/"))[2]
172
172
  }
173
- after_last_author_text = @nodeset
173
+ last_author = @nodeset
174
174
  .xpath("//a[@class='booklink' and @href[contains(.,'/author/') ]][last()]").last
175
- .next_sibling.text.strip
176
- puts after_last_author_text
175
+
176
+ if !last_author.nil? && !last_author.empty?
177
+ after_last_author_text = last_author.next_sibling.text.strip
178
+ else
179
+ last_book = @nodeset
180
+ .xpath("//a[@class='booklink' and @href[contains(.,'/book/') ]][last()]").last
181
+ after_last_author_text = last_book.next_sibling.text.strip
182
+ end
183
+
177
184
  details_hash[:publication] = {
178
185
  year: after_last_author_text[/(?<=: )\d+(?=\.)/],
179
186
  version: after_last_author_text[/(?<=- )\d+(?=η)/],
@@ -0,0 +1,110 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require 'rubygems'
5
+ require 'json'
6
+ require 'logger'
7
+ require 'pp'
8
+ require 'marc'
9
+ require 'htmlentities'
10
+
11
+ module Nlg
12
+ module Extractors
13
+
14
+ class Base
15
+
16
+ attr_reader :url, :nlg_id, :page
17
+
18
+ def initialize(id=nil)
19
+ load_page(id)
20
+ end
21
+
22
+ def load_page(id=nil)
23
+ load_page_by_id(id) unless id.nil?
24
+ end
25
+
26
+ def load_page_by_id(id)
27
+ begin
28
+ @nlg_id = id unless id.nil? # id is expected to be the last number.
29
+ @url = "http://nbib.nlg.gr/Record/#{@nlg_id}/Export?style=MARCXML"
30
+
31
+ pp "Downloading page: #{@url}"
32
+
33
+ Net::HTTP.start("nbib.nlg.gr") do |http|
34
+ response = http.get("/Record/#{@nlg_id}/Export?style=MARCXML")
35
+ pp response.content_type
36
+ pp response.code
37
+ raise EmptyPageError.new(@url) unless response.content_type == "text/xml" && response.code == "200"
38
+
39
+ @page = response.body
40
+ end
41
+
42
+ rescue Errno::ENOENT => e
43
+ pp "Page: #{@url} NOT FOUND."
44
+ pp e
45
+ rescue EmptyPageError => e
46
+ pp "Page: #{@url} is EMPTY."
47
+ pp e
48
+ @page = nil
49
+ rescue OpenURI::HTTPError => e
50
+ pp e
51
+ pp e.io.status
52
+ rescue StandardError => e
53
+ pp "Generic error #{e.class}. Will wait for 2 minutes and then try again."
54
+ pp e
55
+ sleep(120)
56
+ retry
57
+ end
58
+ end
59
+
60
+ # Decodes text with escaped html entities and returns the decoded text.
61
+ #
62
+ # ==== Params:
63
+ #
64
+ # +encoded_text+:: the text which contains encoded entities
65
+ #
66
+ def decode_text(encoded_text)
67
+ self.class.decode_text(encoded_text)
68
+ end
69
+
70
+ def self.decode_text(encoded_text)
71
+ # encoded_text = File.read(encoded_file_path)
72
+ coder = HTMLEntities.new
73
+ coder.decode(encoded_text)
74
+ end
75
+
76
+ def present?(value)
77
+ return (not value.nil? and not value.empty?) ? true : false
78
+ end
79
+
80
+ end
81
+
82
+ # Raised when a page is considered empty.
83
+ #
84
+ class EmptyPageError < StandardError
85
+ attr_reader :url
86
+
87
+ def initialize(url)
88
+ @url = url
89
+
90
+ msg = "Page: #{url} is not valid xml so it is considered EMPTY."
91
+ super(msg)
92
+ end
93
+ end
94
+
95
+ # Raised when something unexpected or in wrong format is parsed.
96
+ #
97
+ class NoIdeaWhatThisIsError < StandardError
98
+ attr_reader :nlg_id, :the_unexpected
99
+
100
+ def initialize(nlg_id, the_unexpected)
101
+ @nlg_id = nlg_id
102
+ @the_unexpected = the_unexpected
103
+
104
+ msg = "We have no idea what this: #{the_unexpected} is. At book #{nlg_id}"
105
+ super(msg)
106
+ end
107
+ end
108
+
109
+ end
110
+ end
@@ -0,0 +1,28 @@
1
+ #!/bin/env ruby
2
+ # encoding: utf-8
3
+
4
+ require_relative 'base'
5
+
6
+ module Nlg
7
+ module Extractors
8
+
9
+ class BookExtractor < Base
10
+ attr_reader :book
11
+
12
+ def initialize(id=nil)
13
+ super(id)
14
+ extract_book unless id.nil? or @page.nil?
15
+ end
16
+
17
+ def load_and_extract_book(id=nil)
18
+ load_page(id)
19
+ extract_book unless id.nil? or @page.nil?
20
+ end
21
+
22
+ def extract_book(nlg_id=@nlg_id, book_page=@page)
23
+ puts "should extract book #{nlg_id} from nlg"
24
+ end
25
+
26
+ end
27
+ end
28
+ end
@@ -1,3 +1,3 @@
1
1
  module Bookshark
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.3"
3
3
  end
@@ -42,10 +42,10 @@
42
42
  "name": "Πανεπιστημιακές Εκδόσεις Κρήτης",
43
43
  "owner": "Στέφανος Τραχανάς",
44
44
  "bookstores": {
45
- "Υποκατάστημα": {
45
+ "&Upsilon;&pi;&omicron;&kappa;&alpha;&tau;&#940;&sigma;&tau;&eta;&mu;&alpha;": {
46
46
  "address": [
47
- "Κλεισόβης 3",
48
- "106 77 Αθήνα"
47
+ "&Kappa;&lambda;&epsilon;&iota;&sigma;&#972;&beta;&eta;&sigmaf; 3",
48
+ "106 77 &Alpha;&theta;&#942;&nu;&alpha;"
49
49
  ],
50
50
  "telephone": [
51
51
  "210 38490203"
@@ -372,7 +372,7 @@
372
372
  "format": "Βιβλίο",
373
373
  "original_language": null,
374
374
  "original_title": null,
375
- "price": "6,85",
375
+ "price": "6,82",
376
376
  "availability": "Κυκλοφορεί",
377
377
  "last_update": null,
378
378
  "series": {
@@ -421,7 +421,7 @@
421
421
  "format": "Βιβλίο",
422
422
  "original_language": null,
423
423
  "original_title": null,
424
- "price": "3,73",
424
+ "price": "3,71",
425
425
  "availability": "Κυκλοφορεί",
426
426
  "last_update": null,
427
427
  "series": {
@@ -445,7 +445,7 @@
445
445
  "contributors": {
446
446
  },
447
447
  "publisher": {
448
- "text": "Δωδώνη Εκδοτική ΕΠΕ",
448
+ "text": "Δωδώνη",
449
449
  "b_id": "1"
450
450
  },
451
451
  "isbn": "960-248-541-8",
@@ -470,7 +470,7 @@
470
470
  "format": "Βιβλίο",
471
471
  "original_language": null,
472
472
  "original_title": null,
473
- "price": "10,60",
473
+ "price": "10,55",
474
474
  "availability": "Κυκλοφορεί",
475
475
  "last_update": null,
476
476
  "series": {
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bookshark
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitris Klisiaris
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-23 00:00:00.000000000 Z
11
+ date: 2017-02-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -72,6 +72,20 @@ dependencies:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
74
  version: '4.3'
75
+ - !ruby/object:Gem::Dependency
76
+ name: marc
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '1.0'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '1.0'
75
89
  - !ruby/object:Gem::Dependency
76
90
  name: bundler
77
91
  requirement: !ruby/object:Gem::Requirement
@@ -128,6 +142,20 @@ dependencies:
128
142
  - - "~>"
129
143
  - !ruby/object:Gem::Version
130
144
  version: '1.2'
145
+ - !ruby/object:Gem::Dependency
146
+ name: pry-byebug
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '3.4'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '3.4'
131
159
  description: Extracts book, author, publisher and category metadata from biblionet.gr.
132
160
  email:
133
161
  - dklisiaris@gmail.com
@@ -155,6 +183,8 @@ files:
155
183
  - lib/bookshark/extractors/bibliographical_book_extractor.rb
156
184
  - lib/bookshark/extractors/book_extractor.rb
157
185
  - lib/bookshark/extractors/category_extractor.rb
186
+ - lib/bookshark/extractors/nlg/base.rb
187
+ - lib/bookshark/extractors/nlg/book_extractor.rb
158
188
  - lib/bookshark/extractors/publisher_extractor.rb
159
189
  - lib/bookshark/extractors/search.rb
160
190
  - lib/bookshark/storage/file_manager.rb