alexandria-book-collection-manager 0.7.1 → 0.7.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +9 -0
- data/.gitignore +5 -2
- data/.hound.yml +2 -0
- data/.rubocop.yml +113 -45
- data/.rubocop_todo.yml +82 -170
- data/.simplecov +5 -1
- data/.travis.yml +45 -0
- data/.yardopts +1 -1
- data/CHANGELOG.md +60 -0
- data/ChangeLog.0 +33 -35
- data/Gemfile +6 -5
- data/INSTALL.md +164 -0
- data/README.md +52 -42
- data/Rakefile +95 -109
- data/TODO.md +9 -1
- data/alexandria-book-collection-manager.gemspec +52 -45
- data/bin/alexandria +31 -53
- data/doc/AUTHORS +61 -0
- data/doc/BUGS +31 -0
- data/doc/FAQ +365 -0
- data/doc/HACKING +19 -0
- data/doc/NEWS +341 -0
- data/doc/alexandria.1 +120 -0
- data/doc/cuecat_support.rdoc +67 -0
- data/doc/dependency_decisions.yml +80 -0
- data/lib/alexandria.rb +29 -37
- data/lib/alexandria/about.rb +52 -51
- data/lib/alexandria/book_providers.rb +94 -101
- data/lib/alexandria/book_providers/adlibris.rb +45 -85
- data/lib/alexandria/book_providers/amazon_aws.rb +105 -113
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +293 -324
- data/lib/alexandria/book_providers/barnes_and_noble.rb +54 -53
- data/lib/alexandria/book_providers/douban.rb +29 -51
- data/lib/alexandria/book_providers/proxis.rb +42 -59
- data/lib/alexandria/book_providers/pseudomarc.rb +79 -99
- data/lib/alexandria/book_providers/siciliano.rb +68 -70
- data/lib/alexandria/book_providers/thalia.rb +46 -45
- data/lib/alexandria/book_providers/web.rb +17 -33
- data/lib/alexandria/book_providers/worldcat.rb +74 -102
- data/lib/alexandria/book_providers/z3950.rb +170 -174
- data/lib/alexandria/config.rb +5 -3
- data/lib/alexandria/console.rb +10 -21
- data/lib/alexandria/default_preferences.rb +37 -0
- data/lib/alexandria/execution_queue.rb +17 -15
- data/lib/alexandria/export_format.rb +47 -0
- data/lib/alexandria/export_library.rb +188 -302
- data/lib/alexandria/import_library.rb +114 -155
- data/lib/alexandria/import_library_csv.rb +46 -96
- data/lib/alexandria/library_collection.rb +79 -0
- data/lib/alexandria/library_sort_order.rb +45 -0
- data/lib/alexandria/library_store.rb +233 -0
- data/lib/alexandria/logging.rb +15 -19
- data/lib/alexandria/models/book.rb +15 -20
- data/lib/alexandria/models/library.rb +81 -363
- data/lib/alexandria/net.rb +7 -6
- data/lib/alexandria/preferences.rb +73 -91
- data/lib/alexandria/scanners.rb +4 -2
- data/lib/alexandria/scanners/{cuecat.rb → cue_cat.rb} +24 -20
- data/lib/alexandria/scanners/keyboard.rb +10 -8
- data/lib/alexandria/smart_library.rb +135 -171
- data/lib/alexandria/ui.rb +17 -15
- data/lib/alexandria/ui/about_dialog.rb +49 -0
- data/lib/alexandria/ui/{dialogs/acquire_dialog.rb → acquire_dialog.rb} +129 -152
- data/lib/alexandria/ui/alert_dialog.rb +64 -0
- data/lib/alexandria/ui/bad_isbns_dialog.rb +41 -0
- data/lib/alexandria/ui/{dialogs/barcode_animation.rb → barcode_animation.rb} +18 -15
- data/lib/alexandria/ui/{dialogs/book_properties_dialog.rb → book_properties_dialog.rb} +44 -61
- data/lib/alexandria/ui/{dialogs/book_properties_dialog_base.rb → book_properties_dialog_base.rb} +84 -89
- data/lib/alexandria/ui/builder_base.rb +9 -27
- data/lib/alexandria/ui/callbacks.rb +188 -186
- data/lib/alexandria/ui/columns.rb +2 -0
- data/lib/alexandria/ui/completion_models.rb +12 -23
- data/lib/alexandria/ui/confirm_erase_dialog.rb +33 -0
- data/lib/alexandria/ui/conflict_while_copying_dialog.rb +34 -0
- data/lib/alexandria/ui/dndable.rb +10 -8
- data/lib/alexandria/ui/error_dialog.rb +25 -0
- data/lib/alexandria/ui/export_dialog.rb +139 -0
- data/lib/alexandria/ui/icons.rb +49 -65
- data/lib/alexandria/ui/iconview.rb +15 -13
- data/lib/alexandria/ui/iconview_tooltips.rb +43 -58
- data/lib/alexandria/ui/import_dialog.rb +157 -0
- data/lib/alexandria/ui/init.rb +23 -33
- data/lib/alexandria/ui/keep_bad_isbn_dialog.rb +36 -0
- data/lib/alexandria/ui/libraries_combo.rb +18 -14
- data/lib/alexandria/ui/listview.rb +77 -88
- data/lib/alexandria/ui/main_app.rb +26 -26
- data/lib/alexandria/ui/misc_dialogs.rb +10 -0
- data/lib/alexandria/ui/multi_drag_treeview.rb +30 -41
- data/lib/alexandria/ui/{dialogs/new_book_dialog.rb → new_book_dialog.rb} +168 -215
- data/lib/alexandria/ui/new_book_dialog_manual.rb +139 -0
- data/lib/alexandria/ui/new_provider_dialog.rb +100 -0
- data/lib/alexandria/ui/new_smart_library_dialog.rb +74 -0
- data/lib/alexandria/ui/preferences_dialog.rb +313 -0
- data/lib/alexandria/ui/provider_preferences_base_dialog.rb +95 -0
- data/lib/alexandria/ui/provider_preferences_dialog.rb +35 -0
- data/lib/alexandria/ui/really_delete_dialog.rb +53 -0
- data/lib/alexandria/ui/{sidepane.rb → sidepane_manager.rb} +62 -72
- data/lib/alexandria/ui/skip_entry_dialog.rb +33 -0
- data/lib/alexandria/ui/smart_library_properties_dialog.rb +60 -0
- data/lib/alexandria/ui/{dialogs/smart_library_properties_dialog_base.rb → smart_library_properties_dialog_base.rb} +96 -172
- data/lib/alexandria/ui/smart_library_rule_box.rb +119 -0
- data/lib/alexandria/ui/sound.rb +13 -13
- data/lib/alexandria/ui/ui_manager.rb +262 -283
- data/lib/alexandria/undo_manager.rb +3 -0
- data/lib/alexandria/version.rb +6 -19
- data/lib/alexandria/web_themes.rb +24 -21
- data/po/Makefile +2 -2
- data/po/cs.po +993 -880
- data/po/cy.po +957 -874
- data/po/de.po +990 -869
- data/po/el.po +989 -869
- data/po/es.po +985 -865
- data/po/fr.po +986 -870
- data/po/ga.po +907 -823
- data/po/gl.po +981 -865
- data/po/it.po +986 -868
- data/po/ja.po +969 -853
- data/po/mk.po +983 -863
- data/po/nb.po +979 -863
- data/po/nl.po +983 -864
- data/po/pl.po +1020 -969
- data/po/pt.po +988 -861
- data/po/pt_BR.po +984 -868
- data/po/ru.po +992 -873
- data/po/sk.po +987 -869
- data/po/sv.po +977 -861
- data/po/uk.po +975 -865
- data/po/zh_TW.po +976 -860
- data/schemas/alexandria.schemas +25 -3
- data/share/alexandria/glade/acquire_dialog__builder.glade +15 -12
- data/share/alexandria/glade/book_properties_dialog__builder.glade +171 -299
- data/share/alexandria/glade/main_app__builder.glade +24 -33
- data/share/alexandria/glade/new_book_dialog__builder.glade +27 -59
- data/share/alexandria/glade/preferences_dialog__builder.glade +250 -290
- data/share/gnome/help/alexandria/C/introduction.xml +0 -8
- data/share/gnome/help/alexandria/C/searching.xml +1 -1
- data/share/gnome/help/alexandria/C/smart-libraries.xml +2 -2
- data/share/gnome/help/alexandria/C/working-with-libraries.xml +1 -1
- data/share/gnome/help/alexandria/fr/alexandria.xml +1 -1
- data/share/gnome/help/alexandria/ja/introduction.xml +0 -8
- data/share/gnome/help/alexandria/ja/smart-libraries.xml +1 -1
- data/spec/alexandria/book_providers/world_cat_provider_spec.rb +160 -0
- data/spec/alexandria/book_providers_spec.rb +77 -210
- data/spec/alexandria/book_spec.rb +16 -12
- data/spec/alexandria/console_spec.rb +27 -0
- data/spec/alexandria/export_library_spec.rb +130 -0
- data/spec/alexandria/library_spec.rb +130 -172
- data/spec/alexandria/library_store_spec.rb +37 -0
- data/spec/alexandria/preferences_spec.rb +46 -17
- data/spec/alexandria/scanners/cue_cat_spec.rb +52 -0
- data/spec/alexandria/smart_library_spec.rb +32 -25
- data/spec/alexandria/ui/about_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/acquire_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/alert_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/book_properties_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/error_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/export_dialog_spec.rb +15 -0
- data/spec/alexandria/ui/icons_spec.rb +26 -0
- data/spec/alexandria/ui/iconview_spec.rb +9 -21
- data/spec/alexandria/ui/import_dialog_spec.rb +41 -0
- data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/main_app_spec.rb +8 -33
- data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +15 -0
- data/spec/alexandria/ui/new_book_dialog_spec.rb +22 -0
- data/spec/alexandria/ui/new_provider_dialog_spec.rb +30 -0
- data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +39 -0
- data/spec/alexandria/ui/preferences_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +34 -0
- data/spec/alexandria/ui/really_delete_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/sidepane_manager_spec.rb +15 -0
- data/spec/alexandria/ui/skip_entry_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +32 -0
- data/spec/alexandria/ui/sound_spec.rb +4 -2
- data/spec/alexandria/ui/ui_manager_spec.rb +45 -20
- data/spec/end_to_end/basic_run_spec.rb +57 -0
- data/spec/spec_helper.rb +66 -33
- data/tasks/setup.rb +5 -3
- data/tasks/spec.rake +18 -3
- data/util/rake/fileinstall.rb +38 -40
- data/util/rake/gettextgenerate.rb +15 -70
- data/util/rake/omfgenerate.rb +10 -10
- metadata +176 -60
- data/INSTALL.rdoc +0 -148
- data/dogtail/basic_run_test.py +0 -9
- data/lib/alexandria/book_providers/bol_it.rb +0 -160
- data/lib/alexandria/book_providers/deastore.rb +0 -273
- data/lib/alexandria/book_providers/ibs_it.rb +0 -147
- data/lib/alexandria/book_providers/mcu.rb +0 -169
- data/lib/alexandria/book_providers/renaud.rb +0 -140
- data/lib/alexandria/book_providers/webster_it.rb +0 -167
- data/lib/alexandria/ui/dialogs/about_dialog.rb +0 -59
- data/lib/alexandria/ui/dialogs/alert_dialog.rb +0 -70
- data/lib/alexandria/ui/dialogs/bad_isbns_dialog.rb +0 -43
- data/lib/alexandria/ui/dialogs/export_dialog.rb +0 -171
- data/lib/alexandria/ui/dialogs/import_dialog.rb +0 -196
- data/lib/alexandria/ui/dialogs/misc_dialogs.rb +0 -85
- data/lib/alexandria/ui/dialogs/new_book_dialog_manual.rb +0 -154
- data/lib/alexandria/ui/dialogs/new_smart_library_dialog.rb +0 -74
- data/lib/alexandria/ui/dialogs/preferences_dialog.rb +0 -578
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog.rb +0 -57
- data/spec/alexandria/scanners/cuecat_spec.rb +0 -65
- data/spec/alexandria/ui/dialogs_spec.rb +0 -94
- data/spec/alexandria/ui/sidepane_spec.rb +0 -27
- data/spec/alexandria/ui/ui_utilities_spec.rb +0 -60
- data/spec/alexandria/utilities_spec.rb +0 -50
- data/tasks/dogtail.rake +0 -4
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Copyright (C) 2009 Cathal Mc Ginley
|
2
4
|
# Copyright (C) 2014 Matijs van Zuijlen
|
3
5
|
#
|
@@ -22,20 +24,20 @@
|
|
22
24
|
# New Tlalia provider, taken from Palatina MetaDataSource and modified
|
23
25
|
# for Alexandria. (21 Dec 2009)
|
24
26
|
|
25
|
-
require
|
26
|
-
require
|
27
|
-
require
|
27
|
+
require "net/http"
|
28
|
+
require "cgi"
|
29
|
+
require "alexandria/book_providers/web"
|
28
30
|
|
29
31
|
module Alexandria
|
30
32
|
class BookProviders
|
31
33
|
class ThaliaProvider < WebsiteBasedProvider
|
32
|
-
include
|
34
|
+
include Logging
|
33
35
|
|
34
|
-
SITE =
|
35
|
-
BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s"
|
36
|
+
SITE = "http://www.thalia.de"
|
37
|
+
BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s" # type,term
|
36
38
|
|
37
39
|
def initialize
|
38
|
-
super(
|
40
|
+
super("Thalia", "Thalia (Germany)")
|
39
41
|
# no preferences for the moment
|
40
42
|
prefs.read
|
41
43
|
end
|
@@ -46,24 +48,25 @@ module Alexandria
|
|
46
48
|
|
47
49
|
def search(criterion, type)
|
48
50
|
req = create_search_uri(type, criterion)
|
49
|
-
|
51
|
+
log.debug { req }
|
50
52
|
html_data = transport.get_response(URI.parse(req))
|
51
53
|
if type == SEARCH_BY_ISBN
|
52
54
|
parse_result_data(html_data.body, criterion)
|
53
55
|
else
|
54
56
|
results = parse_search_result_data(html_data.body)
|
55
57
|
raise NoResultsError if results.empty?
|
58
|
+
|
56
59
|
results.map { |result| get_book_from_search_result(result) }
|
57
60
|
end
|
58
61
|
end
|
59
62
|
|
60
63
|
def create_search_uri(search_type, search_term)
|
61
64
|
(search_type_code = {
|
62
|
-
SEARCH_BY_ISBN
|
63
|
-
SEARCH_BY_AUTHORS =>
|
64
|
-
SEARCH_BY_TITLE
|
65
|
-
SEARCH_BY_KEYWORD =>
|
66
|
-
}[search_type]) ||
|
65
|
+
SEARCH_BY_ISBN => "sq",
|
66
|
+
SEARCH_BY_AUTHORS => "sa", # Autor
|
67
|
+
SEARCH_BY_TITLE => "st", # Titel
|
68
|
+
SEARCH_BY_KEYWORD => "ssw" # Schlagwort
|
69
|
+
}[search_type]) || ""
|
67
70
|
search_type_code = CGI.escape(search_type_code)
|
68
71
|
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
69
72
|
# search_term_encoded = search_term.as_isbn_13
|
@@ -71,18 +74,18 @@ module Alexandria
|
|
71
74
|
else
|
72
75
|
CGI.escape(search_term)
|
73
76
|
end
|
74
|
-
BASE_SEARCH_URL
|
77
|
+
format(BASE_SEARCH_URL, search_type_code, search_term_encoded)
|
75
78
|
end
|
76
79
|
|
77
80
|
def parse_search_result_data(html)
|
78
81
|
doc = html_to_doc(html)
|
79
82
|
book_search_results = []
|
80
|
-
results_divs = doc /
|
83
|
+
results_divs = doc / "div.articlePresentationSearchCH"
|
81
84
|
results_divs.each do |div|
|
82
85
|
result = {}
|
83
|
-
title_link = div %
|
86
|
+
title_link = div % "div.articleText/h2/a"
|
84
87
|
result[:title] = title_link.inner_html
|
85
|
-
result[:lookup_url] = title_link[
|
88
|
+
result[:lookup_url] = title_link["href"]
|
86
89
|
book_search_results << result
|
87
90
|
end
|
88
91
|
book_search_results
|
@@ -91,26 +94,26 @@ module Alexandria
|
|
91
94
|
def data_from_label(node, label_text)
|
92
95
|
label_node = node % "strong[text()*='#{label_text}']"
|
93
96
|
if (item_node = label_node.parent)
|
94
|
-
data =
|
97
|
+
data = ""
|
95
98
|
item_node.children.each do |n|
|
96
99
|
data += n.to_html if n.text?
|
97
100
|
end
|
98
101
|
data.strip
|
99
102
|
else
|
100
|
-
|
103
|
+
""
|
101
104
|
end
|
102
105
|
end
|
103
106
|
|
104
107
|
def get_book_from_search_result(result)
|
105
108
|
log.debug { "Fetching book from #{result[:lookup_url]}" }
|
106
109
|
html_data = transport.get_response(URI.parse(result[:lookup_url]))
|
107
|
-
parse_result_data(html_data.body,
|
110
|
+
parse_result_data(html_data.body, "noisbn", true)
|
108
111
|
end
|
109
112
|
|
110
113
|
def parse_result_data(html, isbn, recursing = false)
|
111
114
|
doc = html_to_doc(html)
|
112
115
|
|
113
|
-
results_divs = doc /
|
116
|
+
results_divs = doc / "div.articlePresentationSearchCH"
|
114
117
|
unless results_divs.empty?
|
115
118
|
if recursing
|
116
119
|
# already recursing, avoid doing so endlessly second time
|
@@ -118,6 +121,7 @@ module Alexandria
|
|
118
121
|
# list
|
119
122
|
return
|
120
123
|
end
|
124
|
+
|
121
125
|
# ISBN-lookup results in multiple results (trying to be
|
122
126
|
# useful, such as for new editions e.g. 9780974514055
|
123
127
|
# "Programming Ruby" )
|
@@ -126,11 +130,10 @@ module Alexandria
|
|
126
130
|
# e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html
|
127
131
|
chosen = results.first # fallback!
|
128
132
|
results.each do |rslt|
|
129
|
-
if rslt[:lookup_url] =~
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
end
|
133
|
+
if rslt[:lookup_url] =~ %r{/ISBN(\d+[\d-]*)/} &&
|
134
|
+
(Regexp.last_match[1].delete("-") == isbn10)
|
135
|
+
chosen = rslt
|
136
|
+
break
|
134
137
|
end
|
135
138
|
end
|
136
139
|
html_data = transport.get_response(URI.parse(chosen[:lookup_url]))
|
@@ -138,9 +141,9 @@ module Alexandria
|
|
138
141
|
end
|
139
142
|
|
140
143
|
begin
|
141
|
-
if (div = doc %
|
144
|
+
if (div = doc % "div#contentFull")
|
142
145
|
title_img = ((div % :h2) / :img).first
|
143
|
-
title = title_img[
|
146
|
+
title = title_img["alt"]
|
144
147
|
|
145
148
|
# note, the following img also has alt="von Author, Author..."
|
146
149
|
|
@@ -148,7 +151,7 @@ module Alexandria
|
|
148
151
|
authors = []
|
149
152
|
author_links = author_h.parent / :a
|
150
153
|
author_links.each do |a|
|
151
|
-
if a[
|
154
|
+
if a["href"].include? "BUCH/sa"
|
152
155
|
# 'sa' means search author, there may also be 'ssw' (search keyword) links
|
153
156
|
authors << a.inner_text[0..-2].strip
|
154
157
|
# NOTE stripping the little >> character here...
|
@@ -156,38 +159,36 @@ module Alexandria
|
|
156
159
|
end
|
157
160
|
end
|
158
161
|
|
159
|
-
item_details = doc %
|
162
|
+
item_details = doc % "ul.itemDataList"
|
160
163
|
isbns = []
|
161
|
-
isbns << data_from_label(item_details,
|
162
|
-
isbns << data_from_label(item_details,
|
164
|
+
isbns << data_from_label(item_details, "EAN")
|
165
|
+
isbns << data_from_label(item_details, "ISBN")
|
163
166
|
|
164
167
|
year = nil
|
165
|
-
date = data_from_label(item_details,
|
166
|
-
if date =~ /(
|
167
|
-
year = Regexp.last_match[1].to_i
|
168
|
-
end
|
168
|
+
date = data_from_label(item_details, "Erschienen:")
|
169
|
+
year = Regexp.last_match[1].to_i if date =~ /(\d{4})/
|
169
170
|
|
170
|
-
binding = data_from_label(item_details,
|
171
|
+
binding = data_from_label(item_details, "Einband")
|
171
172
|
|
172
|
-
publisher = data_from_label(item_details,
|
173
|
+
publisher = data_from_label(item_details, "Erschienen bei:")
|
173
174
|
|
174
175
|
book = Book.new(title, authors, isbns.first,
|
175
176
|
publisher, year, binding)
|
176
177
|
|
177
178
|
image_url = nil
|
178
|
-
if (image_link = doc %
|
179
|
-
image_url = image_link[
|
179
|
+
if (image_link = doc % "a[@id=itemPicStart]")
|
180
|
+
image_url = image_link["href"]
|
180
181
|
end
|
181
182
|
|
182
|
-
|
183
|
+
[book, image_url]
|
183
184
|
|
184
185
|
end
|
185
|
-
rescue => ex
|
186
|
+
rescue StandardError => ex
|
186
187
|
trace = ex.backtrace.join("\n> ")
|
187
|
-
log.warn
|
188
|
-
|
188
|
+
log.warn do
|
189
|
+
"Failed parsing search results for Thalia " \
|
189
190
|
"#{ex.message} #{trace}"
|
190
|
-
|
191
|
+
end
|
191
192
|
raise NoResultsError
|
192
193
|
end
|
193
194
|
end
|
@@ -1,25 +1,11 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
#
|
4
|
-
# Copyright (C) 2014 Matijs van Zuijlen
|
5
|
-
#
|
6
|
-
# Alexandria is free software; you can redistribute it and/or
|
7
|
-
# modify it under the terms of the GNU General Public License as
|
8
|
-
# published by the Free Software Foundation; either version 2 of the
|
9
|
-
# License, or (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Alexandria is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
-
# General Public License for more details.
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file is part of Alexandria.
|
15
4
|
#
|
16
|
-
#
|
17
|
-
# License along with Alexandria; see the file COPYING. If not,
|
18
|
-
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
19
|
-
# Fifth Floor, Boston, MA 02110-1301 USA.
|
5
|
+
# See the file README.md for authorship and licensing information.
|
20
6
|
|
21
|
-
require
|
22
|
-
require
|
7
|
+
require "hpricot"
|
8
|
+
require "htmlentities"
|
23
9
|
|
24
10
|
module Alexandria
|
25
11
|
class BookProviders
|
@@ -29,9 +15,9 @@ module Alexandria
|
|
29
15
|
@htmlentities = HTMLEntities.new
|
30
16
|
end
|
31
17
|
|
32
|
-
def html_to_doc(html, source_data_charset =
|
18
|
+
def html_to_doc(html, source_data_charset = "ISO-8859-1")
|
33
19
|
html.force_encoding source_data_charset
|
34
|
-
utf8_html = html.encode(
|
20
|
+
utf8_html = html.encode("utf-8")
|
35
21
|
normalized_html = @htmlentities.decode(utf8_html)
|
36
22
|
Hpricot(normalized_html)
|
37
23
|
end
|
@@ -40,19 +26,17 @@ module Alexandria
|
|
40
26
|
def text_of(node)
|
41
27
|
if node.nil?
|
42
28
|
nil
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
node_text.strip.squeeze(' ')
|
52
|
-
end
|
29
|
+
elsif node.text?
|
30
|
+
node.to_html
|
31
|
+
elsif node.elem?
|
32
|
+
if node.children.nil?
|
33
|
+
nil
|
34
|
+
else
|
35
|
+
node_text = node.children.map { |n| text_of(n) }.join
|
36
|
+
node_text.strip.squeeze(" ")
|
53
37
|
end
|
54
|
-
# node.inner_html.strip
|
55
38
|
end
|
39
|
+
# node.inner_html.strip
|
56
40
|
end
|
57
41
|
end
|
58
42
|
end
|
@@ -1,22 +1,9 @@
|
|
1
|
-
#
|
2
|
-
|
3
|
-
#
|
4
|
-
# Copyright (C) 2011, 2014 Matijs van Zuijlen
|
5
|
-
#
|
6
|
-
# Alexandria is free software; you can redistribute it and/or
|
7
|
-
# modify it under the terms of the GNU General Public License as
|
8
|
-
# published by the Free Software Foundation; either version 2 of the
|
9
|
-
# License, or (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Alexandria is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
-
# General Public License for more details.
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file is part of Alexandria.
|
15
4
|
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
19
|
-
# Fifth Floor, Boston, MA 02110-1301 USA.
|
5
|
+
# See the file README.md for authorship and licensing information.
|
6
|
+
# frozen_string_literal: true
|
20
7
|
|
21
8
|
# http://en.wikipedia.org/wiki/WorldCat
|
22
9
|
# See http://www.oclc.org/worldcat/policies/terms/
|
@@ -28,34 +15,27 @@
|
|
28
15
|
# Updated from Palatina, to reflect changes in the worldcat website.
|
29
16
|
# (1 Sep 2009)
|
30
17
|
|
31
|
-
require
|
32
|
-
require
|
33
|
-
require
|
18
|
+
require "cgi"
|
19
|
+
require "alexandria/net"
|
20
|
+
require "alexandria/book_providers/web"
|
34
21
|
|
35
22
|
module Alexandria
|
36
23
|
class BookProviders
|
37
24
|
class WorldCatProvider < WebsiteBasedProvider
|
38
|
-
include
|
25
|
+
include Logging
|
39
26
|
|
40
|
-
SITE =
|
41
|
-
BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced"
|
27
|
+
SITE = "https://www.worldcat.org"
|
28
|
+
BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced" # type, term
|
42
29
|
|
43
30
|
def initialize
|
44
|
-
super(
|
45
|
-
# prefs.add("enabled", _("Enabled"), true, [true,false])
|
31
|
+
super("WorldCat", "WorldCat")
|
46
32
|
prefs.read
|
47
33
|
end
|
48
34
|
|
49
35
|
def search(criterion, type)
|
50
|
-
# puts create_search_uri(type, criterion)
|
51
36
|
req = create_search_uri(type, criterion)
|
52
|
-
puts req if $DEBUG
|
53
37
|
html_data = transport.get_response(URI.parse(req))
|
54
|
-
# Note: I tried to use Alexandria::WWWAgent,
|
55
|
-
# but this caused failures here (empty pages...)
|
56
|
-
# find out how the requests differ
|
57
38
|
|
58
|
-
# puts html_data.class
|
59
39
|
if type == SEARCH_BY_ISBN
|
60
40
|
parse_result_data(html_data.body, criterion)
|
61
41
|
else
|
@@ -68,25 +48,22 @@ module Alexandria
|
|
68
48
|
|
69
49
|
def url(book)
|
70
50
|
create_search_uri(SEARCH_BY_ISBN, book.isbn)
|
71
|
-
rescue => ex
|
72
|
-
log.warn { "Cannot create url for book #{book}; #{ex.message}" }
|
73
|
-
nil
|
74
51
|
end
|
75
52
|
|
76
53
|
private
|
77
54
|
|
78
55
|
def create_search_uri(search_type, search_term)
|
79
|
-
(search_type_code = { SEARCH_BY_ISBN
|
80
|
-
SEARCH_BY_AUTHORS =>
|
81
|
-
SEARCH_BY_TITLE
|
82
|
-
SEARCH_BY_KEYWORD =>
|
56
|
+
(search_type_code = { SEARCH_BY_ISBN => "isbn:",
|
57
|
+
SEARCH_BY_AUTHORS => "au:",
|
58
|
+
SEARCH_BY_TITLE => "ti:",
|
59
|
+
SEARCH_BY_KEYWORD => "" }[search_type]) || ""
|
83
60
|
search_type_code = CGI.escape(search_type_code)
|
84
61
|
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
85
62
|
Library.canonicalise_ean(search_term) # isbn-13
|
86
63
|
else
|
87
64
|
CGI.escape(search_term)
|
88
65
|
end
|
89
|
-
BASE_SEARCH_URL
|
66
|
+
format(BASE_SEARCH_URL, search_type_code, search_term_encoded)
|
90
67
|
end
|
91
68
|
|
92
69
|
def get_book_from_search_result(result)
|
@@ -96,18 +73,18 @@ module Alexandria
|
|
96
73
|
end
|
97
74
|
|
98
75
|
def parse_search_result_data(html)
|
99
|
-
doc = html_to_doc(html,
|
76
|
+
doc = html_to_doc(html, "UTF-8")
|
100
77
|
book_search_results = []
|
101
78
|
begin
|
102
|
-
result_cells = doc /
|
103
|
-
# puts result_cells.length
|
79
|
+
result_cells = doc / "td.result/div.name/.."
|
104
80
|
result_cells.each do |td|
|
105
|
-
type_icon = (td %
|
106
|
-
next unless type_icon && type_icon[
|
107
|
-
|
81
|
+
type_icon = (td % "div.type/img.icn")
|
82
|
+
next unless type_icon && type_icon["src"].include?("icon-bks")
|
83
|
+
|
84
|
+
name_div = td % "div.name"
|
108
85
|
title = name_div.inner_text
|
109
86
|
anchor = name_div % :a
|
110
|
-
url = anchor[
|
87
|
+
url = anchor["href"] if anchor
|
111
88
|
lookup_url = "#{SITE}#{url}"
|
112
89
|
result = {}
|
113
90
|
result[:title] = title
|
@@ -115,31 +92,31 @@ module Alexandria
|
|
115
92
|
|
116
93
|
book_search_results << result
|
117
94
|
end
|
118
|
-
rescue => ex
|
95
|
+
rescue StandardError => ex
|
119
96
|
trace = ex.backtrace.join("\n> ")
|
120
|
-
log.warn
|
121
|
-
|
97
|
+
log.warn do
|
98
|
+
"Failed parsing search results for WorldCat " \
|
122
99
|
"#{ex.message} #{trace}"
|
123
|
-
|
100
|
+
end
|
124
101
|
end
|
125
102
|
book_search_results
|
126
103
|
end
|
127
104
|
|
128
105
|
def parse_result_data(html, search_isbn = nil, recursing = false)
|
129
|
-
doc = html_to_doc(html,
|
106
|
+
doc = html_to_doc(html, "UTF-8")
|
130
107
|
|
131
108
|
begin
|
132
|
-
if doc %
|
133
|
-
log.debug {
|
109
|
+
if doc % "div#div-results-none"
|
110
|
+
log.debug { "WorldCat reports no results" }
|
134
111
|
raise NoResultsError
|
135
112
|
end
|
136
113
|
|
137
|
-
if doc %
|
114
|
+
if doc % "table.table-results"
|
138
115
|
if recursing
|
139
|
-
log.warn {
|
116
|
+
log.warn { "Infinite loop prevented redirecting through WorldCat" }
|
140
117
|
raise NoResultsError
|
141
118
|
end
|
142
|
-
log.info {
|
119
|
+
log.info { "Found multiple results for lookup: checking each" }
|
143
120
|
search_results = parse_search_result_data(html)
|
144
121
|
book = nil
|
145
122
|
cover_url = nil
|
@@ -151,42 +128,39 @@ module Alexandria
|
|
151
128
|
html2 = rslt2.body
|
152
129
|
|
153
130
|
book, cover_url = parse_result_data(html2, search_isbn, true)
|
154
|
-
if first_result.nil?
|
155
|
-
first_result = [book, cover_url]
|
156
|
-
end
|
157
131
|
|
158
132
|
log.debug { "got book #{book}" }
|
159
133
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
log.
|
168
|
-
else
|
169
|
-
# no constraint to match isbn, just return first result
|
134
|
+
return [book, cover_url] unless search_isbn
|
135
|
+
|
136
|
+
first_result = [book, cover_url] if first_result.nil?
|
137
|
+
|
138
|
+
search_isbn_canon = Library.canonicalise_ean(search_isbn)
|
139
|
+
rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
|
140
|
+
if search_isbn_canon == rslt_isbn_canon
|
141
|
+
log.info { "book #{book} is a match" }
|
170
142
|
return [book, cover_url]
|
171
143
|
end
|
144
|
+
log.debug { "not a match, checking next" }
|
172
145
|
end
|
173
146
|
|
174
147
|
# gone through all and no ISBN match, so just return first result
|
175
|
-
log.info
|
148
|
+
log.info do
|
149
|
+
"no more results to check. Returning first result, just an approximation"
|
150
|
+
end
|
176
151
|
return first_result
|
177
|
-
|
178
152
|
end
|
179
153
|
|
180
|
-
title_header = doc %
|
154
|
+
title_header = doc % "h1.title"
|
181
155
|
title = title_header.inner_text if title_header
|
182
156
|
unless title
|
183
|
-
log.warn {
|
157
|
+
log.warn { "Unexpected lack of title from WorldCat lookup" }
|
184
158
|
raise NoResultsError
|
185
159
|
end
|
186
160
|
log.info { "Found book #{title} at WorldCat" }
|
187
161
|
|
188
162
|
authors = []
|
189
|
-
authors_tr = doc %
|
163
|
+
authors_tr = doc % "tr#details-allauthors"
|
190
164
|
if authors_tr
|
191
165
|
(authors_tr / :a).each do |a|
|
192
166
|
authors << a.inner_text
|
@@ -194,17 +168,17 @@ module Alexandria
|
|
194
168
|
end
|
195
169
|
|
196
170
|
# can we do better? get the City name?? or multiple publishers?
|
197
|
-
bibdata = doc %
|
171
|
+
bibdata = doc % "div#bibdata"
|
198
172
|
bibdata_table = bibdata % :table
|
199
|
-
publisher_row = bibdata_table %
|
173
|
+
publisher_row = bibdata_table % "th[text()*=Publisher]/.."
|
200
174
|
|
201
175
|
if publisher_row
|
202
|
-
publication_info = (publisher_row /
|
176
|
+
publication_info = (publisher_row / "td").last.inner_text
|
203
177
|
|
204
|
-
publication_info =~ if publication_info.index(
|
205
|
-
|
206
|
-
elsif publication_info.index(
|
207
|
-
|
178
|
+
publication_info =~ if publication_info.index(";")
|
179
|
+
/;\s*([^\d]+)\s*\d*/
|
180
|
+
elsif publication_info.index(":")
|
181
|
+
/:\s*([^;:,]+)/
|
208
182
|
else
|
209
183
|
/([^;,]+)/
|
210
184
|
end
|
@@ -217,35 +191,33 @@ module Alexandria
|
|
217
191
|
year = nil
|
218
192
|
end
|
219
193
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
log.warn { 'No ISBN found on page' }
|
228
|
-
end
|
194
|
+
isbn_row = doc % "tr#details-standardno"
|
195
|
+
if isbn_row
|
196
|
+
isbns = (isbn_row / "td").last.inner_text.split
|
197
|
+
isbn = Library.canonicalise_isbn(isbns.first)
|
198
|
+
else
|
199
|
+
log.warn { "No ISBN found on page" }
|
200
|
+
isbn = search_isbn
|
229
201
|
end
|
230
202
|
|
231
|
-
|
203
|
+
book_binding = "" # not given on WorldCat website (as far as I can tell)
|
232
204
|
|
233
|
-
book = Book.new(title, authors, isbn, publisher, year,
|
205
|
+
book = Book.new(title, authors, isbn, publisher, year, book_binding)
|
234
206
|
|
235
207
|
image_url = nil # hm, it's on the website, but uses JavaScript...
|
236
208
|
|
237
|
-
|
238
|
-
|
239
|
-
rescue => ex
|
209
|
+
[book, image_url]
|
210
|
+
rescue StandardError => ex
|
240
211
|
raise ex if ex.instance_of? NoResultsError
|
212
|
+
|
241
213
|
trace = ex.backtrace.join("\n> ")
|
242
|
-
log.warn
|
243
|
-
|
214
|
+
log.warn do
|
215
|
+
"Failed parsing search results for WorldCat " \
|
244
216
|
"#{ex.message} #{trace}"
|
245
|
-
|
217
|
+
end
|
246
218
|
raise NoResultsError
|
247
219
|
end
|
248
220
|
end
|
249
|
-
end
|
250
|
-
end
|
251
|
-
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|