alexandria-book-collection-manager 0.7.3 → 0.7.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +28 -25
- data/.rubocop_todo.yml +87 -67
- data/CHANGELOG.md +12 -1
- data/Gemfile +4 -3
- data/README.md +16 -6
- data/Rakefile +71 -72
- data/alexandria-book-collection-manager.gemspec +44 -44
- data/bin/alexandria +12 -12
- data/lib/alexandria.rb +22 -22
- data/lib/alexandria/about.rb +50 -50
- data/lib/alexandria/book_providers.rb +50 -50
- data/lib/alexandria/book_providers/adlibris.rb +28 -44
- data/lib/alexandria/book_providers/amazon_aws.rb +64 -64
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +52 -78
- data/lib/alexandria/book_providers/barnes_and_noble.rb +34 -34
- data/lib/alexandria/book_providers/douban.rb +21 -37
- data/lib/alexandria/book_providers/proxis.rb +24 -24
- data/lib/alexandria/book_providers/pseudomarc.rb +19 -19
- data/lib/alexandria/book_providers/renaud.rb +44 -57
- data/lib/alexandria/book_providers/siciliano.rb +39 -39
- data/lib/alexandria/book_providers/thalia.rb +33 -33
- data/lib/alexandria/book_providers/web.rb +5 -5
- data/lib/alexandria/book_providers/worldcat.rb +44 -69
- data/lib/alexandria/book_providers/z3950.rb +94 -109
- data/lib/alexandria/config.rb +1 -1
- data/lib/alexandria/console.rb +3 -3
- data/lib/alexandria/export_format.rb +8 -8
- data/lib/alexandria/export_library.rb +112 -113
- data/lib/alexandria/import_library.rb +45 -45
- data/lib/alexandria/import_library_csv.rb +30 -30
- data/lib/alexandria/library_collection.rb +4 -4
- data/lib/alexandria/library_sort_order.rb +1 -1
- data/lib/alexandria/library_store.rb +14 -14
- data/lib/alexandria/logging.rb +5 -5
- data/lib/alexandria/models/book.rb +1 -1
- data/lib/alexandria/models/library.rb +36 -36
- data/lib/alexandria/net.rb +5 -5
- data/lib/alexandria/preferences.rb +32 -32
- data/lib/alexandria/scanners/{cuecat.rb → cue_cat.rb} +13 -13
- data/lib/alexandria/scanners/keyboard.rb +5 -5
- data/lib/alexandria/smart_library.rb +53 -53
- data/lib/alexandria/ui.rb +15 -15
- data/lib/alexandria/ui/{dialogs/about_dialog.rb → about_dialog.rb} +1 -1
- data/lib/alexandria/ui/{dialogs/acquire_dialog.rb → acquire_dialog.rb} +66 -65
- data/lib/alexandria/ui/{dialogs/alert_dialog.rb → alert_dialog.rb} +1 -16
- data/lib/alexandria/ui/{dialogs/bad_isbns_dialog.rb → bad_isbns_dialog.rb} +0 -0
- data/lib/alexandria/ui/{dialogs/barcode_animation.rb → barcode_animation.rb} +7 -7
- data/lib/alexandria/ui/{dialogs/book_properties_dialog.rb → book_properties_dialog.rb} +25 -37
- data/lib/alexandria/ui/{dialogs/book_properties_dialog_base.rb → book_properties_dialog_base.rb} +38 -37
- data/lib/alexandria/ui/builder_base.rb +1 -1
- data/lib/alexandria/ui/callbacks.rb +95 -91
- data/lib/alexandria/ui/completion_models.rb +7 -21
- data/lib/alexandria/ui/confirm_erase_dialog.rb +33 -0
- data/lib/alexandria/ui/conflict_while_copying_dialog.rb +34 -0
- data/lib/alexandria/ui/dndable.rb +7 -7
- data/lib/alexandria/ui/error_dialog.rb +25 -0
- data/lib/alexandria/ui/{dialogs/export_dialog.rb → export_dialog.rb} +22 -42
- data/lib/alexandria/ui/icons.rb +6 -6
- data/lib/alexandria/ui/iconview.rb +7 -7
- data/lib/alexandria/ui/iconview_tooltips.rb +6 -6
- data/lib/alexandria/ui/{dialogs/import_dialog.rb → import_dialog.rb} +14 -32
- data/lib/alexandria/ui/init.rb +16 -29
- data/lib/alexandria/ui/{dialogs/keep_bad_isbn_dialog.rb → keep_bad_isbn_dialog.rb} +6 -4
- data/lib/alexandria/ui/libraries_combo.rb +7 -7
- data/lib/alexandria/ui/listview.rb +40 -40
- data/lib/alexandria/ui/main_app.rb +22 -24
- data/lib/alexandria/ui/misc_dialogs.rb +10 -0
- data/lib/alexandria/ui/multi_drag_treeview.rb +4 -4
- data/lib/alexandria/ui/{dialogs/new_book_dialog.rb → new_book_dialog.rb} +46 -45
- data/lib/alexandria/ui/{dialogs/new_book_dialog_manual.rb → new_book_dialog_manual.rb} +20 -19
- data/lib/alexandria/ui/new_provider_dialog.rb +99 -0
- data/lib/alexandria/ui/{dialogs/new_smart_library_dialog.rb → new_smart_library_dialog.rb} +4 -4
- data/lib/alexandria/ui/{dialogs/preferences_dialog.rb → preferences_dialog.rb} +44 -235
- data/lib/alexandria/ui/provider_preferences_base_dialog.rb +90 -0
- data/lib/alexandria/ui/provider_preferences_dialog.rb +35 -0
- data/lib/alexandria/ui/{dialogs/misc_dialogs.rb → really_delete_dialog.rb} +6 -27
- data/lib/alexandria/ui/{sidepane.rb → sidepane_manager.rb} +27 -25
- data/lib/alexandria/ui/skip_entry_dialog.rb +32 -0
- data/lib/alexandria/ui/{dialogs/smart_library_properties_dialog.rb → smart_library_properties_dialog.rb} +2 -2
- data/lib/alexandria/ui/{dialogs/smart_library_properties_dialog_base.rb → smart_library_properties_dialog_base.rb} +30 -30
- data/lib/alexandria/ui/sound.rb +8 -8
- data/lib/alexandria/ui/ui_manager.rb +136 -135
- data/lib/alexandria/version.rb +4 -19
- data/lib/alexandria/web_themes.rb +8 -8
- data/po/cs.po +97 -97
- data/po/cy.po +97 -97
- data/po/de.po +97 -97
- data/po/el.po +97 -97
- data/po/es.po +97 -97
- data/po/fr.po +97 -97
- data/po/ga.po +97 -97
- data/po/gl.po +97 -97
- data/po/it.po +97 -97
- data/po/ja.po +97 -97
- data/po/mk.po +97 -97
- data/po/nb.po +97 -97
- data/po/nl.po +97 -97
- data/po/pl.po +97 -97
- data/po/pt.po +97 -97
- data/po/pt_BR.po +97 -97
- data/po/ru.po +97 -97
- data/po/sk.po +97 -97
- data/po/sv.po +97 -97
- data/po/uk.po +97 -97
- data/po/zh_TW.po +97 -97
- data/schemas/alexandria.schemas +24 -2
- data/spec/alexandria/book_providers_spec.rb +65 -82
- data/spec/alexandria/book_spec.rb +12 -10
- data/spec/alexandria/console_spec.rb +9 -9
- data/spec/alexandria/export_library_spec.rb +31 -31
- data/spec/alexandria/library_spec.rb +86 -86
- data/spec/alexandria/library_store_spec.rb +8 -8
- data/spec/alexandria/preferences_spec.rb +18 -17
- data/spec/alexandria/scanners/cue_cat_spec.rb +52 -0
- data/spec/alexandria/smart_library_spec.rb +15 -15
- data/spec/alexandria/ui/about_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/acquire_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/alert_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/book_properties_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/error_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/export_dialog_spec.rb +15 -0
- data/spec/alexandria/ui/iconview_spec.rb +7 -21
- data/spec/alexandria/ui/import_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/main_app_spec.rb +6 -6
- data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +15 -0
- data/spec/alexandria/ui/{dialogs/new_book_dialog_spec.rb → new_book_dialog_spec.rb} +4 -4
- data/spec/alexandria/ui/new_provider_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/preferences_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +19 -0
- data/spec/alexandria/ui/really_delete_dialog_spec.rb +15 -0
- data/spec/alexandria/ui/sidepane_manager_spec.rb +15 -0
- data/spec/alexandria/ui/skip_entry_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +18 -0
- data/spec/alexandria/ui/sound_spec.rb +2 -2
- data/spec/alexandria/ui/ui_manager_spec.rb +6 -20
- data/spec/alexandria/ui/ui_utilities_spec.rb +9 -9
- data/spec/alexandria/utilities_spec.rb +6 -6
- data/spec/end_to_end/basic_run_spec.rb +24 -36
- data/spec/spec_helper.rb +9 -9
- data/tasks/dogtail.rake +1 -1
- data/tasks/setup.rb +2 -2
- data/tasks/spec.rake +11 -11
- data/util/rake/fileinstall.rb +25 -25
- data/util/rake/gettextgenerate.rb +7 -7
- data/util/rake/omfgenerate.rb +7 -7
- metadata +59 -33
- data/spec/alexandria/scanners/cuecat_spec.rb +0 -67
- data/spec/alexandria/ui/dialogs_spec.rb +0 -162
- data/spec/alexandria/ui/sidepane_spec.rb +0 -29
@@ -25,27 +25,27 @@
|
|
25
25
|
# Almost completely rewritten by Cathal Mc Ginley (21 Feb 2009)
|
26
26
|
# based on the new code for Palatina
|
27
27
|
|
28
|
-
require
|
29
|
-
require
|
30
|
-
require
|
28
|
+
require "net/http"
|
29
|
+
require "cgi"
|
30
|
+
require "alexandria/book_providers/web"
|
31
31
|
|
32
32
|
module Alexandria
|
33
33
|
class BookProviders
|
34
34
|
class SicilianoProvider < WebsiteBasedProvider
|
35
35
|
include Logging
|
36
36
|
|
37
|
-
SITE =
|
37
|
+
SITE = "http://www.siciliano.com.br"
|
38
38
|
|
39
39
|
# The string interpolations in this URL are the search term and search
|
40
40
|
# type, respectively.
|
41
41
|
BASE_SEARCH_URL = "#{SITE}/pesquisaweb/pesquisaweb.dll/pesquisa?" \
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
"&FIL_ID=102" \
|
43
|
+
"&PALAVRASN1=%s" \
|
44
|
+
"&FILTRON1=%s" \
|
45
|
+
"&ESTRUTN1=0301&ORDEMN2=E"
|
46
46
|
|
47
47
|
def initialize
|
48
|
-
super(
|
48
|
+
super("Siciliano", "Livraria Siciliano (Brasil)")
|
49
49
|
# no preferences for the moment
|
50
50
|
prefs.read
|
51
51
|
end
|
@@ -57,7 +57,7 @@ module Alexandria
|
|
57
57
|
end
|
58
58
|
|
59
59
|
def search(criterion, type)
|
60
|
-
criterion = criterion.encode(
|
60
|
+
criterion = criterion.encode("ISO-8859-1") # still needed??
|
61
61
|
trying_again = false
|
62
62
|
begin
|
63
63
|
req = create_search_uri(type, criterion, trying_again)
|
@@ -71,12 +71,12 @@ module Alexandria
|
|
71
71
|
else
|
72
72
|
results.map { |result| get_book_from_search_result(result) }
|
73
73
|
end
|
74
|
-
rescue NoResultsError =>
|
74
|
+
rescue NoResultsError => ex
|
75
75
|
if (type == SEARCH_BY_ISBN) && (trying_again == false)
|
76
76
|
trying_again = true
|
77
77
|
retry
|
78
78
|
else
|
79
|
-
raise
|
79
|
+
raise ex
|
80
80
|
end
|
81
81
|
end
|
82
82
|
end
|
@@ -90,10 +90,10 @@ module Alexandria
|
|
90
90
|
private
|
91
91
|
|
92
92
|
def create_search_uri(search_type, search_term, trying_again = false)
|
93
|
-
(search_type_code = { SEARCH_BY_ISBN =>
|
94
|
-
SEARCH_BY_TITLE =>
|
95
|
-
SEARCH_BY_AUTHORS =>
|
96
|
-
SEARCH_BY_KEYWORD =>
|
93
|
+
(search_type_code = { SEARCH_BY_ISBN => "G",
|
94
|
+
SEARCH_BY_TITLE => "A",
|
95
|
+
SEARCH_BY_AUTHORS => "B",
|
96
|
+
SEARCH_BY_KEYWORD => "X" }[search_type]) || "X"
|
97
97
|
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
98
98
|
if trying_again
|
99
99
|
# on second attempt, try ISBN-10...
|
@@ -123,28 +123,28 @@ module Alexandria
|
|
123
123
|
book_search_results = []
|
124
124
|
# each result will be a dict with keys :title, :author, :publisher, :url
|
125
125
|
|
126
|
-
list_items = doc.search(
|
126
|
+
list_items = doc.search("div.pesquisa-item-lista-conteudo")
|
127
127
|
list_items.each do |item|
|
128
128
|
begin
|
129
129
|
result = {}
|
130
130
|
|
131
131
|
# author & publisher
|
132
|
-
author_publisher =
|
132
|
+
author_publisher = ""
|
133
133
|
item.children.each do |node|
|
134
134
|
author_publisher += node.to_s if node.text?
|
135
135
|
author_publisher.strip!
|
136
136
|
break unless author_publisher.empty?
|
137
137
|
end
|
138
|
-
author, publisher = author_publisher.split(
|
138
|
+
author, publisher = author_publisher.split("/")
|
139
139
|
result[:author] = author.strip if author
|
140
140
|
result[:publisher] = publisher.strip if publisher
|
141
141
|
|
142
142
|
# title & url
|
143
|
-
link = item %
|
143
|
+
link = item % "a"
|
144
144
|
result[:title] = link.inner_text.strip
|
145
|
-
link_to_description = link[
|
146
|
-
slash =
|
147
|
-
slash =
|
145
|
+
link_to_description = link["href"]
|
146
|
+
slash = ""
|
147
|
+
slash = "/" unless %r{^/}.match?(link_to_description)
|
148
148
|
result[:url] = "#{SITE}#{slash}#{link_to_description}"
|
149
149
|
|
150
150
|
book_search_results << result
|
@@ -161,38 +161,38 @@ module Alexandria
|
|
161
161
|
# checked against Siciliano website 21 Feb 2009
|
162
162
|
doc = html_to_doc(html)
|
163
163
|
# title
|
164
|
-
title_div = doc %
|
164
|
+
title_div = doc % "div#conteudo//div.titulo"
|
165
165
|
raise NoResultsError unless title_div
|
166
166
|
|
167
|
-
title_h = title_div %
|
167
|
+
title_h = title_div % "h2"
|
168
168
|
title = title_h.inner_text if title_h
|
169
169
|
# title = first_non_empty_text_node(title_div)
|
170
170
|
# author_spans = doc/'span.rotulo'
|
171
|
-
author_hs = title_div /
|
171
|
+
author_hs = title_div / "h3.autor"
|
172
172
|
authors = []
|
173
173
|
author_hs.each do |h|
|
174
174
|
authors << h.inner_text.strip
|
175
175
|
end
|
176
176
|
## synopsis_div = doc % 'div#sinopse'
|
177
|
-
details_div = doc %
|
177
|
+
details_div = doc % "div#tab-caracteristica"
|
178
178
|
details = string_array_to_map(lines_of_text_as_array(details_div))
|
179
179
|
# ISBN
|
180
|
-
isbn = details[
|
180
|
+
isbn = details["ISBN"]
|
181
181
|
## ean = details["CdBarras"]
|
182
|
-
translator = details[
|
182
|
+
translator = details["Tradutor"]
|
183
183
|
authors << translator if translator
|
184
|
-
binding = details[
|
184
|
+
binding = details["Acabamento"]
|
185
185
|
publisher = search_result[:publisher]
|
186
186
|
# publish year
|
187
187
|
publish_year = nil
|
188
|
-
edition = details[
|
188
|
+
edition = details["Edio"]
|
189
189
|
if edition
|
190
190
|
publish_year = Regexp.last_match[1].to_i if edition =~ /([12][0-9]{3})/ # publication date
|
191
191
|
end
|
192
192
|
# cover
|
193
193
|
# ImgSrc[1]="/imagem/imagem.dll?pro_id=1386929&PIM_Id=658849";
|
194
194
|
image_urls = []
|
195
|
-
(doc /
|
195
|
+
(doc / "script").each do |script|
|
196
196
|
next if script.children.nil?
|
197
197
|
|
198
198
|
script.children.each do |ch|
|
@@ -213,7 +213,7 @@ module Alexandria
|
|
213
213
|
end
|
214
214
|
|
215
215
|
def first_non_empty_text_node(elem)
|
216
|
-
text =
|
216
|
+
text = ""
|
217
217
|
elem.children.each do |node|
|
218
218
|
next unless node.text?
|
219
219
|
|
@@ -225,28 +225,28 @@ module Alexandria
|
|
225
225
|
|
226
226
|
def lines_of_text_as_array(elem)
|
227
227
|
lines = []
|
228
|
-
current_text =
|
228
|
+
current_text = ""
|
229
229
|
elem.children.each do |e|
|
230
230
|
if e.text?
|
231
231
|
current_text += e.to_s
|
232
|
-
elsif e.name ==
|
232
|
+
elsif e.name == "br"
|
233
233
|
lines << current_text.strip
|
234
|
-
current_text =
|
234
|
+
current_text = ""
|
235
235
|
else
|
236
236
|
current_text += e.inner_text
|
237
237
|
end
|
238
238
|
end
|
239
239
|
lines << current_text.strip
|
240
|
-
lines.delete(
|
240
|
+
lines.delete("")
|
241
241
|
lines
|
242
242
|
end
|
243
243
|
|
244
244
|
def string_array_to_map(arr)
|
245
245
|
map = {}
|
246
246
|
arr.each do |str|
|
247
|
-
key, val = str.split(
|
247
|
+
key, val = str.split(":")
|
248
248
|
# a real hack for not handling encoding properly :^)
|
249
|
-
map[key.gsub(/[^a-zA-Z]/,
|
249
|
+
map[key.gsub(/[^a-zA-Z]/, "")] = val.strip if val
|
250
250
|
end
|
251
251
|
map
|
252
252
|
end
|
@@ -24,20 +24,20 @@
|
|
24
24
|
# New Tlalia provider, taken from Palatina MetaDataSource and modified
|
25
25
|
# for Alexandria. (21 Dec 2009)
|
26
26
|
|
27
|
-
require
|
28
|
-
require
|
29
|
-
require
|
27
|
+
require "net/http"
|
28
|
+
require "cgi"
|
29
|
+
require "alexandria/book_providers/web"
|
30
30
|
|
31
31
|
module Alexandria
|
32
32
|
class BookProviders
|
33
33
|
class ThaliaProvider < WebsiteBasedProvider
|
34
34
|
include Alexandria::Logging
|
35
35
|
|
36
|
-
SITE =
|
36
|
+
SITE = "http://www.thalia.de"
|
37
37
|
BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s" # type,term
|
38
38
|
|
39
39
|
def initialize
|
40
|
-
super(
|
40
|
+
super("Thalia", "Thalia (Germany)")
|
41
41
|
# no preferences for the moment
|
42
42
|
prefs.read
|
43
43
|
end
|
@@ -62,11 +62,11 @@ module Alexandria
|
|
62
62
|
|
63
63
|
def create_search_uri(search_type, search_term)
|
64
64
|
(search_type_code = {
|
65
|
-
SEARCH_BY_ISBN =>
|
66
|
-
SEARCH_BY_AUTHORS =>
|
67
|
-
SEARCH_BY_TITLE =>
|
68
|
-
SEARCH_BY_KEYWORD =>
|
69
|
-
}[search_type]) ||
|
65
|
+
SEARCH_BY_ISBN => "sq",
|
66
|
+
SEARCH_BY_AUTHORS => "sa", # Autor
|
67
|
+
SEARCH_BY_TITLE => "st", # Titel
|
68
|
+
SEARCH_BY_KEYWORD => "ssw" # Schlagwort
|
69
|
+
}[search_type]) || ""
|
70
70
|
search_type_code = CGI.escape(search_type_code)
|
71
71
|
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
72
72
|
# search_term_encoded = search_term.as_isbn_13
|
@@ -80,12 +80,12 @@ module Alexandria
|
|
80
80
|
def parse_search_result_data(html)
|
81
81
|
doc = html_to_doc(html)
|
82
82
|
book_search_results = []
|
83
|
-
results_divs = doc /
|
83
|
+
results_divs = doc / "div.articlePresentationSearchCH"
|
84
84
|
results_divs.each do |div|
|
85
85
|
result = {}
|
86
|
-
title_link = div %
|
86
|
+
title_link = div % "div.articleText/h2/a"
|
87
87
|
result[:title] = title_link.inner_html
|
88
|
-
result[:lookup_url] = title_link[
|
88
|
+
result[:lookup_url] = title_link["href"]
|
89
89
|
book_search_results << result
|
90
90
|
end
|
91
91
|
book_search_results
|
@@ -94,26 +94,26 @@ module Alexandria
|
|
94
94
|
def data_from_label(node, label_text)
|
95
95
|
label_node = node % "strong[text()*='#{label_text}']"
|
96
96
|
if (item_node = label_node.parent)
|
97
|
-
data =
|
97
|
+
data = ""
|
98
98
|
item_node.children.each do |n|
|
99
99
|
data += n.to_html if n.text?
|
100
100
|
end
|
101
101
|
data.strip
|
102
102
|
else
|
103
|
-
|
103
|
+
""
|
104
104
|
end
|
105
105
|
end
|
106
106
|
|
107
107
|
def get_book_from_search_result(result)
|
108
108
|
log.debug { "Fetching book from #{result[:lookup_url]}" }
|
109
109
|
html_data = transport.get_response(URI.parse(result[:lookup_url]))
|
110
|
-
parse_result_data(html_data.body,
|
110
|
+
parse_result_data(html_data.body, "noisbn", true)
|
111
111
|
end
|
112
112
|
|
113
113
|
def parse_result_data(html, isbn, recursing = false)
|
114
114
|
doc = html_to_doc(html)
|
115
115
|
|
116
|
-
results_divs = doc /
|
116
|
+
results_divs = doc / "div.articlePresentationSearchCH"
|
117
117
|
unless results_divs.empty?
|
118
118
|
if recursing
|
119
119
|
# already recursing, avoid doing so endlessly second time
|
@@ -130,8 +130,8 @@ module Alexandria
|
|
130
130
|
# e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html
|
131
131
|
chosen = results.first # fallback!
|
132
132
|
results.each do |rslt|
|
133
|
-
if rslt[:lookup_url] =~
|
134
|
-
if Regexp.last_match[1].delete(
|
133
|
+
if rslt[:lookup_url] =~ %r{/ISBN(\d+[\d-]*)/}
|
134
|
+
if Regexp.last_match[1].delete("-") == isbn10
|
135
135
|
chosen = rslt
|
136
136
|
break
|
137
137
|
end
|
@@ -142,9 +142,9 @@ module Alexandria
|
|
142
142
|
end
|
143
143
|
|
144
144
|
begin
|
145
|
-
if (div = doc %
|
145
|
+
if (div = doc % "div#contentFull")
|
146
146
|
title_img = ((div % :h2) / :img).first
|
147
|
-
title = title_img[
|
147
|
+
title = title_img["alt"]
|
148
148
|
|
149
149
|
# note, the following img also has alt="von Author, Author..."
|
150
150
|
|
@@ -152,7 +152,7 @@ module Alexandria
|
|
152
152
|
authors = []
|
153
153
|
author_links = author_h.parent / :a
|
154
154
|
author_links.each do |a|
|
155
|
-
if a[
|
155
|
+
if a["href"] =~ %r{BUCH/sa}
|
156
156
|
# 'sa' means search author, there may also be 'ssw' (search keyword) links
|
157
157
|
authors << a.inner_text[0..-2].strip
|
158
158
|
# NOTE stripping the little >> character here...
|
@@ -160,25 +160,25 @@ module Alexandria
|
|
160
160
|
end
|
161
161
|
end
|
162
162
|
|
163
|
-
item_details = doc %
|
163
|
+
item_details = doc % "ul.itemDataList"
|
164
164
|
isbns = []
|
165
|
-
isbns << data_from_label(item_details,
|
166
|
-
isbns << data_from_label(item_details,
|
165
|
+
isbns << data_from_label(item_details, "EAN")
|
166
|
+
isbns << data_from_label(item_details, "ISBN")
|
167
167
|
|
168
168
|
year = nil
|
169
|
-
date = data_from_label(item_details,
|
169
|
+
date = data_from_label(item_details, "Erschienen:")
|
170
170
|
year = Regexp.last_match[1].to_i if date =~ /([\d]{4})/
|
171
171
|
|
172
|
-
binding = data_from_label(item_details,
|
172
|
+
binding = data_from_label(item_details, "Einband")
|
173
173
|
|
174
|
-
publisher = data_from_label(item_details,
|
174
|
+
publisher = data_from_label(item_details, "Erschienen bei:")
|
175
175
|
|
176
176
|
book = Book.new(title, authors, isbns.first,
|
177
177
|
publisher, year, binding)
|
178
178
|
|
179
179
|
image_url = nil
|
180
|
-
if (image_link = doc %
|
181
|
-
image_url = image_link[
|
180
|
+
if (image_link = doc % "a[@id=itemPicStart]")
|
181
|
+
image_url = image_link["href"]
|
182
182
|
end
|
183
183
|
|
184
184
|
return [book, image_url]
|
@@ -186,10 +186,10 @@ module Alexandria
|
|
186
186
|
end
|
187
187
|
rescue StandardError => ex
|
188
188
|
trace = ex.backtrace.join("\n> ")
|
189
|
-
log.warn
|
190
|
-
|
189
|
+
log.warn do
|
190
|
+
"Failed parsing search results for Thalia " \
|
191
191
|
"#{ex.message} #{trace}"
|
192
|
-
|
192
|
+
end
|
193
193
|
raise NoResultsError
|
194
194
|
end
|
195
195
|
end
|
@@ -4,8 +4,8 @@
|
|
4
4
|
#
|
5
5
|
# See the file README.md for authorship and licensing information.
|
6
6
|
|
7
|
-
require
|
8
|
-
require
|
7
|
+
require "hpricot"
|
8
|
+
require "htmlentities"
|
9
9
|
|
10
10
|
module Alexandria
|
11
11
|
class BookProviders
|
@@ -15,9 +15,9 @@ module Alexandria
|
|
15
15
|
@htmlentities = HTMLEntities.new
|
16
16
|
end
|
17
17
|
|
18
|
-
def html_to_doc(html, source_data_charset =
|
18
|
+
def html_to_doc(html, source_data_charset = "ISO-8859-1")
|
19
19
|
html.force_encoding source_data_charset
|
20
|
-
utf8_html = html.encode(
|
20
|
+
utf8_html = html.encode("utf-8")
|
21
21
|
normalized_html = @htmlentities.decode(utf8_html)
|
22
22
|
Hpricot(normalized_html)
|
23
23
|
end
|
@@ -33,7 +33,7 @@ module Alexandria
|
|
33
33
|
nil
|
34
34
|
else
|
35
35
|
node_text = node.children.map { |n| text_of(n) }.join
|
36
|
-
node_text.strip.squeeze(
|
36
|
+
node_text.strip.squeeze(" ")
|
37
37
|
end
|
38
38
|
end
|
39
39
|
# node.inner_html.strip
|
@@ -1,24 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
3
|
+
# This file is part of Alexandria.
|
4
4
|
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
# Alexandria is free software; you can redistribute it and/or
|
9
|
-
# modify it under the terms of the GNU General Public License as
|
10
|
-
# published by the Free Software Foundation; either version 2 of the
|
11
|
-
# License, or (at your option) any later version.
|
12
|
-
#
|
13
|
-
# Alexandria is distributed in the hope that it will be useful,
|
14
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
16
|
-
# General Public License for more details.
|
17
|
-
#
|
18
|
-
# You should have received a copy of the GNU General Public
|
19
|
-
# License along with Alexandria; see the file COPYING. If not,
|
20
|
-
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
21
|
-
# Fifth Floor, Boston, MA 02110-1301 USA.
|
5
|
+
# See the file README.md for authorship and licensing information.
|
6
|
+
# frozen_string_literal: true
|
22
7
|
|
23
8
|
# http://en.wikipedia.org/wiki/WorldCat
|
24
9
|
# See http://www.oclc.org/worldcat/policies/terms/
|
@@ -30,34 +15,27 @@
|
|
30
15
|
# Updated from Palatina, to reflect changes in the worldcat website.
|
31
16
|
# (1 Sep 2009)
|
32
17
|
|
33
|
-
require
|
34
|
-
require
|
35
|
-
require
|
18
|
+
require "cgi"
|
19
|
+
require "alexandria/net"
|
20
|
+
require "alexandria/book_providers/web"
|
36
21
|
|
37
22
|
module Alexandria
|
38
23
|
class BookProviders
|
39
24
|
class WorldCatProvider < WebsiteBasedProvider
|
40
25
|
include Alexandria::Logging
|
41
26
|
|
42
|
-
SITE =
|
27
|
+
SITE = "https://www.worldcat.org"
|
43
28
|
BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced" # type, term
|
44
29
|
|
45
30
|
def initialize
|
46
|
-
super(
|
47
|
-
# prefs.add("enabled", _("Enabled"), true, [true,false])
|
31
|
+
super("WorldCat", "WorldCat")
|
48
32
|
prefs.read
|
49
33
|
end
|
50
34
|
|
51
35
|
def search(criterion, type)
|
52
|
-
# puts create_search_uri(type, criterion)
|
53
36
|
req = create_search_uri(type, criterion)
|
54
|
-
puts req if $DEBUG
|
55
37
|
html_data = transport.get_response(URI.parse(req))
|
56
|
-
# Note: I tried to use Alexandria::WWWAgent,
|
57
|
-
# but this caused failures here (empty pages...)
|
58
|
-
# find out how the requests differ
|
59
38
|
|
60
|
-
# puts html_data.class
|
61
39
|
if type == SEARCH_BY_ISBN
|
62
40
|
parse_result_data(html_data.body, criterion)
|
63
41
|
else
|
@@ -70,18 +48,15 @@ module Alexandria
|
|
70
48
|
|
71
49
|
def url(book)
|
72
50
|
create_search_uri(SEARCH_BY_ISBN, book.isbn)
|
73
|
-
rescue StandardError => ex
|
74
|
-
log.warn { "Cannot create url for book #{book}; #{ex.message}" }
|
75
|
-
nil
|
76
51
|
end
|
77
52
|
|
78
53
|
private
|
79
54
|
|
80
55
|
def create_search_uri(search_type, search_term)
|
81
|
-
(search_type_code = { SEARCH_BY_ISBN =>
|
82
|
-
SEARCH_BY_AUTHORS =>
|
83
|
-
SEARCH_BY_TITLE =>
|
84
|
-
SEARCH_BY_KEYWORD =>
|
56
|
+
(search_type_code = { SEARCH_BY_ISBN => "isbn:",
|
57
|
+
SEARCH_BY_AUTHORS => "au:",
|
58
|
+
SEARCH_BY_TITLE => "ti:",
|
59
|
+
SEARCH_BY_KEYWORD => "" }[search_type]) || ""
|
85
60
|
search_type_code = CGI.escape(search_type_code)
|
86
61
|
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
87
62
|
Library.canonicalise_ean(search_term) # isbn-13
|
@@ -98,19 +73,19 @@ module Alexandria
|
|
98
73
|
end
|
99
74
|
|
100
75
|
def parse_search_result_data(html)
|
101
|
-
doc = html_to_doc(html,
|
76
|
+
doc = html_to_doc(html, "UTF-8")
|
102
77
|
book_search_results = []
|
103
78
|
begin
|
104
|
-
result_cells = doc /
|
79
|
+
result_cells = doc / "td.result/div.name/.."
|
105
80
|
# puts result_cells.length
|
106
81
|
result_cells.each do |td|
|
107
|
-
type_icon = (td %
|
108
|
-
next unless type_icon && type_icon[
|
82
|
+
type_icon = (td % "div.type/img.icn")
|
83
|
+
next unless type_icon && type_icon["src"] =~ /icon-bks/
|
109
84
|
|
110
|
-
name_div = td %
|
85
|
+
name_div = td % "div.name"
|
111
86
|
title = name_div.inner_text
|
112
87
|
anchor = name_div % :a
|
113
|
-
url = anchor[
|
88
|
+
url = anchor["href"] if anchor
|
114
89
|
lookup_url = "#{SITE}#{url}"
|
115
90
|
result = {}
|
116
91
|
result[:title] = title
|
@@ -120,29 +95,29 @@ module Alexandria
|
|
120
95
|
end
|
121
96
|
rescue StandardError => ex
|
122
97
|
trace = ex.backtrace.join("\n> ")
|
123
|
-
log.warn
|
124
|
-
|
98
|
+
log.warn do
|
99
|
+
"Failed parsing search results for WorldCat " \
|
125
100
|
"#{ex.message} #{trace}"
|
126
|
-
|
101
|
+
end
|
127
102
|
end
|
128
103
|
book_search_results
|
129
104
|
end
|
130
105
|
|
131
106
|
def parse_result_data(html, search_isbn = nil, recursing = false)
|
132
|
-
doc = html_to_doc(html,
|
107
|
+
doc = html_to_doc(html, "UTF-8")
|
133
108
|
|
134
109
|
begin
|
135
|
-
if doc %
|
136
|
-
log.debug {
|
110
|
+
if doc % "div#div-results-none"
|
111
|
+
log.debug { "WorldCat reports no results" }
|
137
112
|
raise NoResultsError
|
138
113
|
end
|
139
114
|
|
140
|
-
if doc %
|
115
|
+
if doc % "table.table-results"
|
141
116
|
if recursing
|
142
|
-
log.warn {
|
117
|
+
log.warn { "Infinite loop prevented redirecting through WorldCat" }
|
143
118
|
raise NoResultsError
|
144
119
|
end
|
145
|
-
log.info {
|
120
|
+
log.info { "Found multiple results for lookup: checking each" }
|
146
121
|
search_results = parse_search_result_data(html)
|
147
122
|
book = nil
|
148
123
|
cover_url = nil
|
@@ -165,7 +140,7 @@ module Alexandria
|
|
165
140
|
log.info { "book #{book} is a match" }
|
166
141
|
return [book, cover_url]
|
167
142
|
end
|
168
|
-
log.debug {
|
143
|
+
log.debug { "not a match, checking next" }
|
169
144
|
else
|
170
145
|
# no constraint to match isbn, just return first result
|
171
146
|
return [book, cover_url]
|
@@ -173,21 +148,21 @@ module Alexandria
|
|
173
148
|
end
|
174
149
|
|
175
150
|
# gone through all and no ISBN match, so just return first result
|
176
|
-
log.info {
|
151
|
+
log.info { "no more results to check. Returning first result, just an approximation" }
|
177
152
|
return first_result
|
178
153
|
|
179
154
|
end
|
180
155
|
|
181
|
-
title_header = doc %
|
156
|
+
title_header = doc % "h1.title"
|
182
157
|
title = title_header.inner_text if title_header
|
183
158
|
unless title
|
184
|
-
log.warn {
|
159
|
+
log.warn { "Unexpected lack of title from WorldCat lookup" }
|
185
160
|
raise NoResultsError
|
186
161
|
end
|
187
162
|
log.info { "Found book #{title} at WorldCat" }
|
188
163
|
|
189
164
|
authors = []
|
190
|
-
authors_tr = doc %
|
165
|
+
authors_tr = doc % "tr#details-allauthors"
|
191
166
|
if authors_tr
|
192
167
|
(authors_tr / :a).each do |a|
|
193
168
|
authors << a.inner_text
|
@@ -195,16 +170,16 @@ module Alexandria
|
|
195
170
|
end
|
196
171
|
|
197
172
|
# can we do better? get the City name?? or multiple publishers?
|
198
|
-
bibdata = doc %
|
173
|
+
bibdata = doc % "div#bibdata"
|
199
174
|
bibdata_table = bibdata % :table
|
200
|
-
publisher_row = bibdata_table %
|
175
|
+
publisher_row = bibdata_table % "th[text()*=Publisher]/.."
|
201
176
|
|
202
177
|
if publisher_row
|
203
|
-
publication_info = (publisher_row /
|
178
|
+
publication_info = (publisher_row / "td").last.inner_text
|
204
179
|
|
205
|
-
publication_info =~ if publication_info.index(
|
180
|
+
publication_info =~ if publication_info.index(";")
|
206
181
|
/;[\s]*([^\d]+)[\s]*[\d]*/
|
207
|
-
elsif publication_info.index(
|
182
|
+
elsif publication_info.index(":")
|
208
183
|
/:[\s]*([^;:,]+)/
|
209
184
|
else
|
210
185
|
/([^;,]+)/
|
@@ -220,16 +195,16 @@ module Alexandria
|
|
220
195
|
|
221
196
|
isbn = search_isbn
|
222
197
|
unless isbn
|
223
|
-
isbn_row = doc %
|
198
|
+
isbn_row = doc % "tr#details-standardno" # #bibdata_table % 'th[text()*=ISBN]/..'
|
224
199
|
if isbn_row
|
225
|
-
isbns = (isbn_row /
|
200
|
+
isbns = (isbn_row / "td").last.inner_text.split
|
226
201
|
isbn = Library.canonicalise_isbn(isbns.first)
|
227
202
|
else
|
228
|
-
log.warn {
|
203
|
+
log.warn { "No ISBN found on page" }
|
229
204
|
end
|
230
205
|
end
|
231
206
|
|
232
|
-
binding =
|
207
|
+
binding = "" # not given on WorldCat website (as far as I can tell)
|
233
208
|
|
234
209
|
book = Book.new(title, authors, isbn, publisher, year, binding)
|
235
210
|
|
@@ -240,10 +215,10 @@ module Alexandria
|
|
240
215
|
raise ex if ex.instance_of? NoResultsError
|
241
216
|
|
242
217
|
trace = ex.backtrace.join("\n> ")
|
243
|
-
log.warn
|
244
|
-
|
218
|
+
log.warn do
|
219
|
+
"Failed parsing search results for WorldCat " \
|
245
220
|
"#{ex.message} #{trace}"
|
246
|
-
|
221
|
+
end
|
247
222
|
raise NoResultsError
|
248
223
|
end
|
249
224
|
end
|