alexandria-book-collection-manager 0.7.2 → 0.7.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.hound.yml +2 -0
- data/.rubocop.yml +4 -4
- data/.rubocop_todo.yml +55 -82
- data/CHANGELOG.md +17 -0
- data/INSTALL.md +3 -3
- data/README.md +1 -2
- data/Rakefile +14 -33
- data/TODO.md +9 -1
- data/alexandria-book-collection-manager.gemspec +4 -3
- data/bin/alexandria +21 -34
- data/doc/FAQ +2 -5
- data/lib/alexandria.rb +4 -16
- data/lib/alexandria/book_providers.rb +5 -7
- data/lib/alexandria/book_providers/adlibris.rb +5 -3
- data/lib/alexandria/book_providers/amazon_aws.rb +3 -1
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +8 -0
- data/lib/alexandria/book_providers/barnes_and_noble.rb +7 -4
- data/lib/alexandria/book_providers/douban.rb +1 -1
- data/lib/alexandria/book_providers/proxis.rb +11 -27
- data/lib/alexandria/book_providers/renaud.rb +9 -3
- data/lib/alexandria/book_providers/siciliano.rb +7 -4
- data/lib/alexandria/book_providers/thalia.rb +5 -3
- data/lib/alexandria/book_providers/web.rb +11 -29
- data/lib/alexandria/book_providers/worldcat.rb +7 -5
- data/lib/alexandria/book_providers/z3950.rb +10 -7
- data/lib/alexandria/console.rb +5 -18
- data/lib/alexandria/execution_queue.rb +2 -1
- data/lib/alexandria/export_format.rb +47 -0
- data/lib/alexandria/export_library.rb +72 -180
- data/lib/alexandria/import_library.rb +14 -23
- data/lib/alexandria/import_library_csv.rb +3 -6
- data/lib/alexandria/library_collection.rb +78 -0
- data/lib/alexandria/library_sort_order.rb +43 -0
- data/lib/alexandria/library_store.rb +222 -0
- data/lib/alexandria/logging.rb +2 -0
- data/lib/alexandria/models/book.rb +8 -16
- data/lib/alexandria/models/library.rb +26 -308
- data/lib/alexandria/preferences.rb +7 -24
- data/lib/alexandria/scanners/cuecat.rb +3 -1
- data/lib/alexandria/smart_library.rb +32 -67
- data/lib/alexandria/ui/builder_base.rb +6 -26
- data/lib/alexandria/ui/callbacks.rb +8 -34
- data/lib/alexandria/ui/completion_models.rb +2 -1
- data/lib/alexandria/ui/dialogs/about_dialog.rb +35 -47
- data/lib/alexandria/ui/dialogs/acquire_dialog.rb +14 -30
- data/lib/alexandria/ui/dialogs/alert_dialog.rb +8 -17
- data/lib/alexandria/ui/dialogs/bad_isbns_dialog.rb +10 -24
- data/lib/alexandria/ui/dialogs/book_properties_dialog.rb +17 -18
- data/lib/alexandria/ui/dialogs/book_properties_dialog_base.rb +25 -41
- data/lib/alexandria/ui/dialogs/export_dialog.rb +48 -56
- data/lib/alexandria/ui/dialogs/import_dialog.rb +31 -51
- data/lib/alexandria/ui/dialogs/keep_bad_isbn_dialog.rb +33 -0
- data/lib/alexandria/ui/dialogs/misc_dialogs.rb +12 -25
- data/lib/alexandria/ui/dialogs/new_book_dialog.rb +57 -94
- data/lib/alexandria/ui/dialogs/new_book_dialog_manual.rb +24 -42
- data/lib/alexandria/ui/dialogs/new_smart_library_dialog.rb +9 -21
- data/lib/alexandria/ui/dialogs/preferences_dialog.rb +27 -32
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog.rb +5 -3
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog_base.rb +15 -12
- data/lib/alexandria/ui/icons.rb +11 -22
- data/lib/alexandria/ui/init.rb +3 -3
- data/lib/alexandria/ui/libraries_combo.rb +1 -0
- data/lib/alexandria/ui/listview.rb +5 -21
- data/lib/alexandria/ui/multi_drag_treeview.rb +20 -32
- data/lib/alexandria/ui/sidepane.rb +9 -24
- data/lib/alexandria/ui/ui_manager.rb +36 -60
- data/lib/alexandria/undo_manager.rb +1 -0
- data/lib/alexandria/version.rb +2 -2
- data/lib/alexandria/web_themes.rb +1 -0
- data/po/cs.po +0 -4
- data/po/cy.po +0 -4
- data/po/de.po +0 -4
- data/po/el.po +0 -4
- data/po/es.po +0 -4
- data/po/fr.po +0 -4
- data/po/ga.po +0 -4
- data/po/gl.po +0 -4
- data/po/it.po +0 -4
- data/po/ja.po +0 -4
- data/po/mk.po +0 -4
- data/po/nb.po +0 -4
- data/po/nl.po +0 -4
- data/po/pl.po +0 -4
- data/po/pt.po +0 -4
- data/po/pt_BR.po +0 -4
- data/po/ru.po +0 -4
- data/po/sk.po +0 -4
- data/po/sv.po +0 -4
- data/po/uk.po +0 -4
- data/po/zh_TW.po +0 -4
- data/schemas/alexandria.schemas +1 -1
- data/share/alexandria/glade/acquire_dialog__builder.glade +14 -11
- data/share/alexandria/glade/book_properties_dialog__builder.glade +170 -298
- data/share/alexandria/glade/main_app__builder.glade +22 -16
- data/share/alexandria/glade/new_book_dialog__builder.glade +26 -58
- data/share/alexandria/glade/preferences_dialog__builder.glade +249 -289
- data/share/gnome/help/alexandria/C/introduction.xml +0 -4
- data/share/gnome/help/alexandria/ja/introduction.xml +0 -4
- data/spec/alexandria/book_providers_spec.rb +1 -20
- data/spec/alexandria/console_spec.rb +32 -0
- data/spec/alexandria/export_library_spec.rb +141 -0
- data/spec/alexandria/library_spec.rb +24 -80
- data/spec/alexandria/library_store_spec.rb +37 -0
- data/spec/alexandria/smart_library_spec.rb +27 -22
- data/spec/alexandria/ui/dialogs/new_book_dialog_spec.rb +22 -0
- data/spec/alexandria/ui/dialogs_spec.rb +104 -38
- data/spec/end_to_end/basic_run_spec.rb +69 -0
- data/spec/spec_helper.rb +13 -25
- data/tasks/spec.rake +15 -2
- data/util/rake/fileinstall.rb +1 -0
- metadata +38 -16
- data/lib/alexandria/book_providers/deastore.rb +0 -265
- data/lib/alexandria/book_providers/mcu.rb +0 -182
@@ -1,265 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# -*- ruby -*-
|
4
|
-
#
|
5
|
-
# Copyright (C) 2009 Cathal Mc Ginley
|
6
|
-
# Copyright (C) 2011, 2014, 2016 Matijs van Zuijlen
|
7
|
-
#
|
8
|
-
# Alexandria is free software; you can redistribute it and/or
|
9
|
-
# modify it under the terms of the GNU General Public License as
|
10
|
-
# published by the Free Software Foundation; either version 2 of the
|
11
|
-
# License, or (at your option) any later version.
|
12
|
-
#
|
13
|
-
# Alexandria is distributed in the hope that it will be useful,
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
16
|
-
# General Public License for more details.
|
17
|
-
#
|
18
|
-
# You should have received a copy of the GNU General Public
|
19
|
-
# License along with Alexandria; see the file COPYING. If not,
|
20
|
-
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
21
|
-
# Fifth Floor, Boston, MA 02110-1301 USA.
|
22
|
-
|
23
|
-
# New DeaStore provider, taken from the Palatina MetaDataSource and
|
24
|
-
# modified to fit the structure of Alexandria book providers.
|
25
|
-
# (24 Feb 2009)
|
26
|
-
|
27
|
-
require 'cgi'
|
28
|
-
require 'alexandria/net'
|
29
|
-
require 'alexandria/book_providers/web'
|
30
|
-
|
31
|
-
module Alexandria
|
32
|
-
class BookProviders
|
33
|
-
class DeaStoreProvider < WebsiteBasedProvider
|
34
|
-
include Alexandria::Logging
|
35
|
-
|
36
|
-
SITE = 'http://www.deastore.com'
|
37
|
-
BASE_SEARCH_URL = "#{SITE}/search/italian_books/0/%s/%s" # type/term
|
38
|
-
|
39
|
-
def initialize
|
40
|
-
super('DeaStore', 'DeaStore (Italy)')
|
41
|
-
prefs.read
|
42
|
-
@agent = nil
|
43
|
-
end
|
44
|
-
|
45
|
-
def agent
|
46
|
-
unless @agent
|
47
|
-
@agent = Alexandria::WWWAgent.new
|
48
|
-
@agent.language = :it
|
49
|
-
end
|
50
|
-
@agent
|
51
|
-
end
|
52
|
-
|
53
|
-
def get_book_from_search_result(result)
|
54
|
-
log.debug { "Fetching book from #{result[:url]}" }
|
55
|
-
html_data = agent.get(result[:url])
|
56
|
-
# File.open("rsltflarn#{Time.now().usec()}.html", 'wb') do |f|
|
57
|
-
# f.write(html_data.body)
|
58
|
-
# end
|
59
|
-
parse_result_data(html_data.body)
|
60
|
-
end
|
61
|
-
|
62
|
-
def search(criterion, type)
|
63
|
-
begin
|
64
|
-
criterion = criterion.encode('ISO-8859-1') # still needed??
|
65
|
-
rescue Encoding::UndefinedConversionError
|
66
|
-
log.info { "Cannot search for non-ISO-8859-1 terms at DeaStore : #{criterion}" }
|
67
|
-
raise NoResultsError
|
68
|
-
end
|
69
|
-
html_data = agent.get(create_search_uri(type, criterion))
|
70
|
-
# File.open("flarn#{Time.now().usec()}.html", 'wb') do |f|
|
71
|
-
# f.write(html_data.body)
|
72
|
-
# end
|
73
|
-
results = parse_search_result_data(html_data.body)
|
74
|
-
raise NoResultsError if results.empty?
|
75
|
-
|
76
|
-
if type == SEARCH_BY_ISBN
|
77
|
-
get_book_from_search_result(results.first)
|
78
|
-
else
|
79
|
-
results.map { |result| get_book_from_search_result(result) }
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
# it isn't possible to create a URL for a book given only the ISBN...
|
84
|
-
def url(_book)
|
85
|
-
nil
|
86
|
-
end
|
87
|
-
|
88
|
-
private
|
89
|
-
|
90
|
-
def create_search_uri(search_type, search_term)
|
91
|
-
# bah! very, very similar to the siciliano code! refactor out this duplication
|
92
|
-
(search_type_code = { SEARCH_BY_ISBN => 'isbn',
|
93
|
-
SEARCH_BY_TITLE => 'title',
|
94
|
-
SEARCH_BY_AUTHORS => 'author',
|
95
|
-
SEARCH_BY_KEYWORD => 'keywords' }[search_type]) || 'keywords'
|
96
|
-
|
97
|
-
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
98
|
-
Library.canonicalise_isbn(search_term) # isbn-10
|
99
|
-
else
|
100
|
-
CGI.escape(search_term)
|
101
|
-
end
|
102
|
-
|
103
|
-
uri = format(BASE_SEARCH_URL, search_type_code, search_term_encoded)
|
104
|
-
log.debug { uri }
|
105
|
-
uri
|
106
|
-
end
|
107
|
-
|
108
|
-
def parse_search_result_data(html)
|
109
|
-
doc = html_to_doc(html)
|
110
|
-
book_search_results = []
|
111
|
-
|
112
|
-
result_divs = doc.search('div.scheda_prodotto')
|
113
|
-
result_divs.each do |div|
|
114
|
-
begin
|
115
|
-
# The layout...
|
116
|
-
# a > img
|
117
|
-
# div.scheda_content
|
118
|
-
# a[link->productpage] title ## a.titolo_link
|
119
|
-
# p (genre I think) ## !ignore
|
120
|
-
# a[link->author] author ## a.info
|
121
|
-
# p.editore (publisher? editor?)
|
122
|
-
# p Data di pubblicazione: \n 2009
|
123
|
-
# p.prezzo (price)
|
124
|
-
|
125
|
-
# cover_url = ''
|
126
|
-
# cover_images = div/'a/img'
|
127
|
-
# unless cover_images.empty?
|
128
|
-
# img = cover_images.first
|
129
|
-
# image_url = img['src']
|
130
|
-
# if image_url =~ /^http/
|
131
|
-
# cover_url = '' # image_url
|
132
|
-
# elsif image_url[0..0] != '/'
|
133
|
-
# cover_url = "#{SITE}/#{image_url}"
|
134
|
-
# else
|
135
|
-
# cover_url = "#{SITE}#{image_url}"
|
136
|
-
# end
|
137
|
-
# log.debug { "Search Cover Image URL #{cover_url}" }
|
138
|
-
|
139
|
-
# end
|
140
|
-
|
141
|
-
content = div / 'div.scheda_content'
|
142
|
-
title_link = (content / :a).first
|
143
|
-
title = normalize(title_link.inner_text)
|
144
|
-
link_to_description = title_link['href']
|
145
|
-
lookup_url = "#{SITE}#{link_to_description}"
|
146
|
-
|
147
|
-
authors = []
|
148
|
-
(content / 'a.info').each do |link|
|
149
|
-
authors << normalize(link.inner_text)
|
150
|
-
end
|
151
|
-
|
152
|
-
result = {}
|
153
|
-
result[:author] = authors.first # HACK, what about multiple authors
|
154
|
-
result[:title] = title
|
155
|
-
result[:url] = lookup_url
|
156
|
-
|
157
|
-
publishers = (content / 'p.editore')
|
158
|
-
result[:publisher] = normalize(publishers.first.inner_text) unless publishers.empty?
|
159
|
-
|
160
|
-
book_search_results << result
|
161
|
-
rescue => ex
|
162
|
-
trace = ex.backtrace.join("\n> ")
|
163
|
-
log.error { "Failed parsing DeaStore search page #{ex.message}\n#{trace}" }
|
164
|
-
end
|
165
|
-
end
|
166
|
-
book_search_results
|
167
|
-
end
|
168
|
-
|
169
|
-
def parse_result_data(html)
|
170
|
-
doc = html_to_doc(html)
|
171
|
-
data = doc % 'div#dati_scheda'
|
172
|
-
# sotto_data_hdr = doc % 'div.sotto_schede/h1.titolo_sotto[text()*="Informazioni generali"]/..'
|
173
|
-
# title
|
174
|
-
title_span = data % 'h1.titolo_scheda'
|
175
|
-
title = normalize(title_span.inner_text)
|
176
|
-
# cover
|
177
|
-
cover_link = nil
|
178
|
-
cover_img = data / 'a/img'
|
179
|
-
cover_link = cover_img.first['src'] unless cover_img.empty?
|
180
|
-
# author(s)
|
181
|
-
authors = []
|
182
|
-
author_span = data % 'span.int_scheda[text()*=Autore]'
|
183
|
-
author_span ||= data % 'span.int_scheda[text()*=cura]'
|
184
|
-
if author_span
|
185
|
-
author_links = author_span / 'a.info'
|
186
|
-
authors = []
|
187
|
-
author_links.each do |link|
|
188
|
-
authors << normalize(link.inner_html)
|
189
|
-
end
|
190
|
-
end
|
191
|
-
# if author_span
|
192
|
-
# author_links = author_span/'a.info'
|
193
|
-
# author_links.each do |link|
|
194
|
-
# authors << normalize(link.inner_text)
|
195
|
-
# end
|
196
|
-
# end
|
197
|
-
# publisher
|
198
|
-
publisher_par = data % 'span.int_scheda[text()*=Editore]/..'
|
199
|
-
publisher_link = publisher_par % 'a.info'
|
200
|
-
publisher = normalize(publisher_link.inner_text)
|
201
|
-
# skip 'Collana', (ummm, possibly genre information, Babelfish
|
202
|
-
# says "Necklace")
|
203
|
-
# format
|
204
|
-
format_par = data % 'span.int_scheda[text()*=Formato]/..'
|
205
|
-
format_par.inner_text =~ /:[\s]*(.+)[\s]*$/
|
206
|
-
binding = normalize(Regexp.last_match[1])
|
207
|
-
# year
|
208
|
-
date_par = data % 'span.int_scheda[text()*=Data di pubblicazione]/..'
|
209
|
-
date_par.inner_text =~ /:[\s]*([12][0-9]{3})[\s]*$/
|
210
|
-
publish_year = nil
|
211
|
-
publish_year = Regexp.last_match[1].to_i if Regexp.last_match[1]
|
212
|
-
isbn_spans = data / 'div.sotto/span.isbn'
|
213
|
-
isbns = []
|
214
|
-
isbn_spans.each do |span|
|
215
|
-
span.inner_text =~ /:[\s]*(.+)[\s]*$/
|
216
|
-
isbns << Regexp.last_match[1]
|
217
|
-
end
|
218
|
-
isbn = nil
|
219
|
-
isbn = Library.canonicalise_isbn(isbns.first) unless isbns.empty?
|
220
|
-
# Editore & Imprint : as publisher info above...
|
221
|
-
# pages
|
222
|
-
# page_par = data % 'span.int_scheda[text()*=Pagine]/..'
|
223
|
-
# if page_par
|
224
|
-
# page_par.inner_text =~ /:[\s]*([0-9]+)[\s]*$/
|
225
|
-
# pages = $1.to_i
|
226
|
-
# end
|
227
|
-
# synopsis_div = doc % 'div.sotto_schede' # exclude the first span though
|
228
|
-
# book = Book.new(title, isbns.first, authors)
|
229
|
-
# if publisher
|
230
|
-
# book.publisher = Publisher.new(publisher)
|
231
|
-
# end
|
232
|
-
# if format
|
233
|
-
# book.binding = CoverBinding.new(format, binding_type(format))
|
234
|
-
# end
|
235
|
-
# cover
|
236
|
-
image_url = nil
|
237
|
-
if cover_link
|
238
|
-
image_url = if cover_link =~ /^http/
|
239
|
-
# e.g. http://images.btol.com/ContentCafe/Jacket.aspx?\
|
240
|
-
# Return=1&Type=M&Value=9788873641803&password=\
|
241
|
-
# CC70580&userID=DEA40305
|
242
|
-
# seems not to work, or to be blank anyway, so set to nil
|
243
|
-
nil
|
244
|
-
elsif cover_link[0..0] != '/'
|
245
|
-
"#{SITE}/#{cover_link}"
|
246
|
-
else
|
247
|
-
"#{SITE}#{cover_link}"
|
248
|
-
end
|
249
|
-
log.debug { "Cover Image URL:: #{image_url}" }
|
250
|
-
end
|
251
|
-
book = Book.new(title, authors, isbn, publisher, publish_year, binding)
|
252
|
-
[book, image_url]
|
253
|
-
rescue => ex
|
254
|
-
trace = ex.backtrace.join("\n> ")
|
255
|
-
log.error { "Failed parsing DeaStore product page #{ex.message}\n#{trace}" }
|
256
|
-
nil
|
257
|
-
end
|
258
|
-
|
259
|
-
def normalize(str)
|
260
|
-
str = str.squeeze(' ').strip unless str.nil?
|
261
|
-
str
|
262
|
-
end
|
263
|
-
end
|
264
|
-
end
|
265
|
-
end
|
@@ -1,182 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Copyright (C) 2004 Javier Fernandez-Sanguino
|
4
|
-
# Copyright (C) 2007 Javier Fernandez-Sanguino and Marco Costantini
|
5
|
-
# Copyright (C) 2011, 2016 Matijs van Zuijlen
|
6
|
-
#
|
7
|
-
# Alexandria is free software; you can redistribute it and/or
|
8
|
-
# modify it under the terms of the GNU General Public License as
|
9
|
-
# published by the Free Software Foundation; either version 2 of the
|
10
|
-
# License, or (at your option) any later version.
|
11
|
-
#
|
12
|
-
# Alexandria is distributed in the hope that it will be useful,
|
13
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15
|
-
# General Public License for more details.
|
16
|
-
#
|
17
|
-
# You should have received a copy of the GNU General Public
|
18
|
-
# License along with Alexandria; see the file COPYING. If not,
|
19
|
-
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
20
|
-
# Fifth Floor, Boston, MA 02110-1301 USA.
|
21
|
-
|
22
|
-
require 'cgi'
|
23
|
-
require 'net/http'
|
24
|
-
|
25
|
-
# http://www.mcu.es/libro/CE/AgenciaISBN/BBDDLibros/Sobre.html
|
26
|
-
# http://www.mcu.es/comun/bases/isbn/ISBN.html
|
27
|
-
|
28
|
-
module Alexandria
|
29
|
-
class BookProviders
|
30
|
-
class MCUProvider < GenericProvider
|
31
|
-
include Logging
|
32
|
-
include GetText
|
33
|
-
GetText.bindtextdomain(Alexandria::TEXTDOMAIN, charset: 'UTF-8')
|
34
|
-
|
35
|
-
LANGUAGES = {
|
36
|
-
'es' => '1'
|
37
|
-
}.freeze
|
38
|
-
|
39
|
-
# BASE_URI = "http://www.mcu.es/cgi-bin/BRSCGI3701?"
|
40
|
-
BASE_URI = 'http://www.mcu.es/cgi-brs/BasesHTML/isbn/BRSCGI?'
|
41
|
-
def initialize
|
42
|
-
super('MCU', _('Spanish Culture Ministry'))
|
43
|
-
# No preferences
|
44
|
-
prefs.read
|
45
|
-
end
|
46
|
-
|
47
|
-
def search(criterion, type)
|
48
|
-
prefs.read
|
49
|
-
criterion = criterion.encode('ISO-8859-1') # still needed??
|
50
|
-
print "Doing search with MCU #{criterion}, type: #{type}\n" if $DEBUG # for DEBUGing
|
51
|
-
req = BASE_URI +
|
52
|
-
'CMD=VERLST&BASE=ISBN&DOCS=1-15&CONF=AEISPA.cnf&OPDEF=AND&DOCS=1-1000&SEPARADOR=&'
|
53
|
-
req += case type
|
54
|
-
when SEARCH_BY_ISBN
|
55
|
-
"WGEN-C=&WISB-C=#{CGI.escape(criterion)}&WAUT-C=&WTIT-C="
|
56
|
-
|
57
|
-
when SEARCH_BY_TITLE
|
58
|
-
"WGEN-C=&WISB-C=&WAUT-C=&WTIT-C=#{CGI.escape(criterion)}"
|
59
|
-
|
60
|
-
when SEARCH_BY_AUTHORS
|
61
|
-
"WGEN-C=&WISB-C=&WAUT-C=#{CGI.escape(criterion)}&WTIT-C="
|
62
|
-
|
63
|
-
when SEARCH_BY_KEYWORD
|
64
|
-
"WGEN-C=#{CGI.escape(criterion)}&WISB-C=&WAUT-C=&WTIT-C="
|
65
|
-
|
66
|
-
else
|
67
|
-
raise InvalidSearchTypeError
|
68
|
-
end
|
69
|
-
req +=
|
70
|
-
'&WMAT-C=&WEDI-C=&WFEP-C=&%40T353-GE=&%40T353-LE=&WSER-C=&WLUG-C=' \
|
71
|
-
'&WDIS-C=%28DISPONIBLE+or+AGOTADO%29&WLEN-C=&WCLA-C=&WSOP-C='
|
72
|
-
products = {}
|
73
|
-
print "Request page is #{req}\n" if $DEBUG # for DEBUGing
|
74
|
-
transport.get(URI.parse(req)).each do |line|
|
75
|
-
print "Reading line: #{line}" if $DEBUG # for DEBUGing
|
76
|
-
next unless line =~ /CMD=VERDOC.*&DOCN=([^&]*)&NDOC=([^&]*)/
|
77
|
-
next if products[Regexp.last_match[1]]
|
78
|
-
next unless (book = parseBook(Regexp.last_match[1], Regexp.last_match[2]))
|
79
|
-
products[Regexp.last_match[1]] = book
|
80
|
-
puts Regexp.last_match[1] if $DEBUG # for DEBUGing
|
81
|
-
end
|
82
|
-
|
83
|
-
raise NoResultsError if products.values.empty?
|
84
|
-
type == SEARCH_BY_ISBN ? products.values.first : products.values
|
85
|
-
end
|
86
|
-
|
87
|
-
def url(book)
|
88
|
-
isbn = Library.canonicalise_isbn(book.isbn)
|
89
|
-
'http://www.mcu.es/cgi-brs/BasesHTML/isbn/BRSCGI?CMD=VERLST&BASE=ISBN&DOCS=1-15' \
|
90
|
-
"&CONF=AEISPA.cnf&OPDEF=AND&DOCS=1&SEPARADOR=&WGEN-C=&WISB-C=#{isbn}&WAUT-C=&WTIT-C=" \
|
91
|
-
'&WMAT-C=&WEDI-C=&WFEP-C=&%40T353-GE=&%40T353-LE=&WSER-C=&WLUG-C=' \
|
92
|
-
'&WDIS-C=%28DISPONIBLE+or+AGOTADO%29&WLEN-C=&WCLA-C=&WSOP-C='
|
93
|
-
rescue => ex
|
94
|
-
log.warn { "Cannot create url for book #{book}; #{ex.message}" }
|
95
|
-
nil
|
96
|
-
end
|
97
|
-
|
98
|
-
private
|
99
|
-
|
100
|
-
def parseBook(docn, ndoc)
|
101
|
-
detailspage =
|
102
|
-
'http://www.mcu.es/cgi-brs/BasesHTML/isbn/BRSCGI?' \
|
103
|
-
'CMD=VERDOC&CONF=AEISPA.cnf&BASE=ISBN&DOCN=' + docn + '&NDOC=' + ndoc
|
104
|
-
print "Looking at detailspage: #{detailspage}\n" if $DEBUG # for DEBUGing
|
105
|
-
product = {}
|
106
|
-
product['authors'] = []
|
107
|
-
robotstate = 0
|
108
|
-
transport.get(URI.parse(detailspage)).each do |line|
|
109
|
-
# This is a very crude robot interpreter
|
110
|
-
# Note that the server provides more information
|
111
|
-
# we don't store:
|
112
|
-
# - Language - Description
|
113
|
-
# - Binding - Price
|
114
|
-
# - Colection - Theme
|
115
|
-
# - CDU - Last update
|
116
|
-
|
117
|
-
# There seems to be an issue with accented chars..
|
118
|
-
line = line.encode('UTF-8')
|
119
|
-
print "Reading line (robotstate #{robotstate}): #{line}" if $DEBUG # for DEBUGing
|
120
|
-
if line =~ /^<\/td>$/ || line =~ /^<\/tr>$/
|
121
|
-
robotstate = 0
|
122
|
-
elsif (robotstate == 1) && line =~ /^([^<]+)</
|
123
|
-
author = Regexp.last_match[1].gsub(' ', ' ').sub(/ +$/, '')
|
124
|
-
if author.length > 3
|
125
|
-
# Only add authors of appropiate length
|
126
|
-
product['authors'] << author
|
127
|
-
print "Authors are #{product['authors']}\n" if $DEBUG # for DEBUGing
|
128
|
-
robotstate = 0
|
129
|
-
end
|
130
|
-
elsif (robotstate == 2) && line =~ /^(.*)$/
|
131
|
-
# The title es the next line to title declaration and has not tags on web src code
|
132
|
-
product['name'] = Regexp.last_match[1].strip
|
133
|
-
print "Name is #{product['name']}\n" if $DEBUG # for DEBUGing
|
134
|
-
robotstate = 0
|
135
|
-
elsif (robotstate == 3) && line =~ /^([0-9]+-[0-9]+-[0-9]+-[0-9]+-[0-9]).*/
|
136
|
-
product['isbn'] = Regexp.last_match[1]
|
137
|
-
print "ISBN is #{product['isbn']}\n" if $DEBUG # for DEBUGing
|
138
|
-
robotstate = 0
|
139
|
-
elsif (robotstate == 4) && line =~ /^([^<]+)</
|
140
|
-
product['manufacturer'] = Regexp.last_match[1].strip
|
141
|
-
print "Manufacturer is #{product['manufacturer']}\n" if $DEBUG # for DEBUGing
|
142
|
-
robotstate = 0
|
143
|
-
# elsif robotstate == 5 and line =~ /^([^<]+)</
|
144
|
-
elsif (robotstate == 5) && line =~ /<span>([^<]+)</
|
145
|
-
product['media'] = Regexp.last_match[1].strip
|
146
|
-
print "Media is #{product['media']}\n" if $DEBUG # for DEBUGing
|
147
|
-
robotstate = 0
|
148
|
-
elsif line =~ /^.*>Autor:\s*</
|
149
|
-
robotstate = 1
|
150
|
-
elsif line =~ /^.*>T(.|í)tulo:\s*</
|
151
|
-
robotstate = 2
|
152
|
-
elsif line =~ /^.*>ISBN \(13\):\s*</
|
153
|
-
robotstate = 3
|
154
|
-
elsif line =~ /^.*>Publicaci(.|ó)n:\s*</
|
155
|
-
robotstate = 4
|
156
|
-
elsif line =~ /^.*>Encuadernaci(.|ó)n:\s*</
|
157
|
-
robotstate = 5
|
158
|
-
end
|
159
|
-
end
|
160
|
-
|
161
|
-
# TODO: This provider does not include picture for books
|
162
|
-
# %w{name isbn media manufacturer}.each do |field|
|
163
|
-
# print "Checking #{field} for nil\n" if $DEBUG # for DEBUGing
|
164
|
-
# product[field]="" if product[field].nil?
|
165
|
-
# end
|
166
|
-
|
167
|
-
print "Creating new book\n" if $DEBUG # for DEBUGing
|
168
|
-
book = Book.new(product['name'],
|
169
|
-
product['authors'],
|
170
|
-
product['isbn'].delete('-'),
|
171
|
-
product['manufacturer'],
|
172
|
-
nil, # TODO: furnish publish year
|
173
|
-
product['media'])
|
174
|
-
if book.title.nil?
|
175
|
-
log.warn { "No title was returned for #{book.isbn}" }
|
176
|
-
book.title = ''
|
177
|
-
end
|
178
|
-
[book]
|
179
|
-
end
|
180
|
-
end
|
181
|
-
end
|
182
|
-
end
|