alexandria-book-collection-manager 0.6.9.pre1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rubocop.yml +68 -0
- data/.rubocop_todo.yml +200 -0
- data/CHANGELOG.md +23 -0
- data/COPYING +339 -0
- data/ChangeLog.0 +3598 -0
- data/Gemfile +9 -0
- data/INSTALL.rdoc +156 -0
- data/PACKAGING +36 -0
- data/README.md +88 -0
- data/RELEASE_CHECKLIST +18 -0
- data/Rakefile +264 -0
- data/TODO +24 -0
- data/alexandria-book-collection-manager.gemspec +35 -0
- data/alexandria.desktop.in +10 -0
- data/bin/alexandria +82 -0
- data/doc/AUTHORS +60 -0
- data/doc/BUGS +31 -0
- data/doc/FAQ +369 -0
- data/doc/HACKING +19 -0
- data/doc/NEWS +341 -0
- data/doc/alexandria.1 +120 -0
- data/doc/cuecat_support.rdoc +67 -0
- data/lib/alexandria.rb +85 -0
- data/lib/alexandria/about.rb +82 -0
- data/lib/alexandria/book_providers.rb +411 -0
- data/lib/alexandria/book_providers/adlibris.rb +235 -0
- data/lib/alexandria/book_providers/amazon_aws.rb +261 -0
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +405 -0
- data/lib/alexandria/book_providers/barnes_and_noble.rb +229 -0
- data/lib/alexandria/book_providers/bol_it.rb +162 -0
- data/lib/alexandria/book_providers/deastore.rb +277 -0
- data/lib/alexandria/book_providers/douban.rb +135 -0
- data/lib/alexandria/book_providers/ibs_it.rb +149 -0
- data/lib/alexandria/book_providers/mcu.rb +177 -0
- data/lib/alexandria/book_providers/proxis.rb +205 -0
- data/lib/alexandria/book_providers/pseudomarc.rb +185 -0
- data/lib/alexandria/book_providers/renaud.rb +142 -0
- data/lib/alexandria/book_providers/siciliano.rb +271 -0
- data/lib/alexandria/book_providers/thalia.rb +197 -0
- data/lib/alexandria/book_providers/web.rb +59 -0
- data/lib/alexandria/book_providers/webster_it.rb +173 -0
- data/lib/alexandria/book_providers/worldcat.rb +251 -0
- data/lib/alexandria/book_providers/z3950.rb +422 -0
- data/lib/alexandria/config.rb +8 -0
- data/lib/alexandria/console.rb +31 -0
- data/lib/alexandria/execution_queue.rb +96 -0
- data/lib/alexandria/export_library.rb +536 -0
- data/lib/alexandria/import_library.rb +316 -0
- data/lib/alexandria/import_library_csv.rb +270 -0
- data/lib/alexandria/logging.rb +159 -0
- data/lib/alexandria/models/book.rb +72 -0
- data/lib/alexandria/models/library.rb +714 -0
- data/lib/alexandria/net.rb +53 -0
- data/lib/alexandria/preferences.rb +324 -0
- data/lib/alexandria/scanners.rb +42 -0
- data/lib/alexandria/scanners/cuecat.rb +118 -0
- data/lib/alexandria/scanners/keyboard.rb +57 -0
- data/lib/alexandria/smart_library.rb +525 -0
- data/lib/alexandria/ui.rb +53 -0
- data/lib/alexandria/ui/builder_base.rb +43 -0
- data/lib/alexandria/ui/callbacks.rb +389 -0
- data/lib/alexandria/ui/completion_models.rb +228 -0
- data/lib/alexandria/ui/dialogs/about_dialog.rb +59 -0
- data/lib/alexandria/ui/dialogs/acquire_dialog.rb +640 -0
- data/lib/alexandria/ui/dialogs/alert_dialog.rb +68 -0
- data/lib/alexandria/ui/dialogs/bad_isbns_dialog.rb +43 -0
- data/lib/alexandria/ui/dialogs/barcode_animation.rb +159 -0
- data/lib/alexandria/ui/dialogs/book_properties_dialog.rb +210 -0
- data/lib/alexandria/ui/dialogs/book_properties_dialog_base.rb +432 -0
- data/lib/alexandria/ui/dialogs/export_dialog.rb +172 -0
- data/lib/alexandria/ui/dialogs/import_dialog.rb +205 -0
- data/lib/alexandria/ui/dialogs/misc_dialogs.rb +85 -0
- data/lib/alexandria/ui/dialogs/new_book_dialog.rb +639 -0
- data/lib/alexandria/ui/dialogs/new_book_dialog_manual.rb +153 -0
- data/lib/alexandria/ui/dialogs/new_smart_library_dialog.rb +67 -0
- data/lib/alexandria/ui/dialogs/preferences_dialog.rb +587 -0
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog.rb +56 -0
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog_base.rb +432 -0
- data/lib/alexandria/ui/dndable.rb +81 -0
- data/lib/alexandria/ui/gtk_thread_help.rb +88 -0
- data/lib/alexandria/ui/icons.rb +100 -0
- data/lib/alexandria/ui/iconview.rb +91 -0
- data/lib/alexandria/ui/iconview_tooltips.rb +162 -0
- data/lib/alexandria/ui/init.rb +93 -0
- data/lib/alexandria/ui/libraries_combo.rb +75 -0
- data/lib/alexandria/ui/listview.rb +310 -0
- data/lib/alexandria/ui/main_app.rb +67 -0
- data/lib/alexandria/ui/multi_drag_treeview.rb +150 -0
- data/lib/alexandria/ui/sidepane.rb +194 -0
- data/lib/alexandria/ui/sound.rb +107 -0
- data/lib/alexandria/ui/ui_manager.rb +1308 -0
- data/lib/alexandria/undo_manager.rb +78 -0
- data/lib/alexandria/utils.rb +30 -0
- data/lib/alexandria/version.rb +24 -0
- data/lib/alexandria/web_themes.rb +75 -0
- data/misc/sounds/README +15 -0
- data/misc/sounds/bad_scan.csd +62 -0
- data/misc/sounds/good_scan.csd +61 -0
- data/misc/sounds/scanning.csd +46 -0
- data/po/ChangeLog +488 -0
- data/po/Makefile +44 -0
- data/po/README +29 -0
- data/po/commit-po +72 -0
- data/po/cs.po +1437 -0
- data/po/cy.po +1521 -0
- data/po/de.po +1400 -0
- data/po/el.po +1379 -0
- data/po/es.po +1376 -0
- data/po/fr.po +1420 -0
- data/po/ga.po +1359 -0
- data/po/gl.po +1397 -0
- data/po/it.po +1406 -0
- data/po/ja.po +1355 -0
- data/po/mk.po +1373 -0
- data/po/nb.po +1386 -0
- data/po/nl.po +1405 -0
- data/po/pl.po +1373 -0
- data/po/pt.po +1398 -0
- data/po/pt_BR.po +1409 -0
- data/po/ru.po +1372 -0
- data/po/sk.po +1380 -0
- data/po/sv.po +1402 -0
- data/po/uk.po +1423 -0
- data/po/zh_TW.po +1394 -0
- data/schemas/alexandria.schemas +300 -0
- data/share/alexandria/glade/acquire_dialog__builder.glade +201 -0
- data/share/alexandria/glade/book_properties_dialog__builder.glade +910 -0
- data/share/alexandria/glade/main_app__builder.glade +229 -0
- data/share/alexandria/glade/new_book_dialog__builder.glade +379 -0
- data/share/alexandria/glade/preferences_dialog__builder.glade +733 -0
- data/share/alexandria/icons/alexandria.png +0 -0
- data/share/alexandria/icons/alexandria_small.png +0 -0
- data/share/alexandria/icons/book.png +0 -0
- data/share/alexandria/icons/book_icon.png +0 -0
- data/share/alexandria/icons/book_small.png +0 -0
- data/share/alexandria/icons/cuecat.png +0 -0
- data/share/alexandria/icons/cuecat_inactive.png +0 -0
- data/share/alexandria/icons/favorite_tag.png +0 -0
- data/share/alexandria/icons/less.png +0 -0
- data/share/alexandria/icons/library.png +0 -0
- data/share/alexandria/icons/library_small.png +0 -0
- data/share/alexandria/icons/lookup.png +0 -0
- data/share/alexandria/icons/more.png +0 -0
- data/share/alexandria/icons/no_cover.png +0 -0
- data/share/alexandria/icons/smart_library.png +0 -0
- data/share/alexandria/icons/smart_library_small.png +0 -0
- data/share/alexandria/icons/star_set.png +0 -0
- data/share/alexandria/icons/star_unset.png +0 -0
- data/share/alexandria/icons/view_as_icons.png +0 -0
- data/share/alexandria/icons/view_as_list.png +0 -0
- data/share/alexandria/ui/menus.xml +91 -0
- data/share/alexandria/ui/popups.xml +91 -0
- data/share/alexandria/web-themes/clean/clean.css +85 -0
- data/share/alexandria/web-themes/clean/preview.jpg +0 -0
- data/share/alexandria/web-themes/list/list.css +105 -0
- data/share/alexandria/web-themes/list/preview.jpg +0 -0
- data/share/app-icon/16x16/alexandria.png +0 -0
- data/share/app-icon/16x16/alexandria.svg +263 -0
- data/share/app-icon/22x22/alexandria.png +0 -0
- data/share/app-icon/22x22/alexandria.svg +465 -0
- data/share/app-icon/24x24/alexandria.png +0 -0
- data/share/app-icon/32x32/alexandria.png +0 -0
- data/share/app-icon/32x32/alexandria.svg +813 -0
- data/share/app-icon/32x32/alexandria.xpm +241 -0
- data/share/app-icon/48x48/alexandria.png +0 -0
- data/share/app-icon/scalable/alexandria.svg +700 -0
- data/share/gnome/help/alexandria/C/about.xml +44 -0
- data/share/gnome/help/alexandria/C/adding-books.xml +339 -0
- data/share/gnome/help/alexandria/C/alexandria.xml +185 -0
- data/share/gnome/help/alexandria/C/bugs.xml +18 -0
- data/share/gnome/help/alexandria/C/editing-book-properties.xml +124 -0
- data/share/gnome/help/alexandria/C/exporting.xml +81 -0
- data/share/gnome/help/alexandria/C/figures/adding_books_acquire_from_scanner_process.png +0 -0
- data/share/gnome/help/alexandria/C/figures/adding_books_add_by_isbn.png +0 -0
- data/share/gnome/help/alexandria/C/figures/adding_books_isbn_import.png +0 -0
- data/share/gnome/help/alexandria/C/figures/adding_books_manual_details.png +0 -0
- data/share/gnome/help/alexandria/C/figures/adding_books_rename_library_after_import.png +0 -0
- data/share/gnome/help/alexandria/C/figures/adding_books_search_results.png +0 -0
- data/share/gnome/help/alexandria/C/figures/editing_book_properties_info.png +0 -0
- data/share/gnome/help/alexandria/C/figures/editing_book_properties_loaning.png +0 -0
- data/share/gnome/help/alexandria/C/figures/exporting_information_html.png +0 -0
- data/share/gnome/help/alexandria/C/figures/getting_started_first_launched.png +0 -0
- data/share/gnome/help/alexandria/C/figures/searching_filtering_views_list_view.png +0 -0
- data/share/gnome/help/alexandria/C/figures/searching_filtering_views_list_view_search.png +0 -0
- data/share/gnome/help/alexandria/C/figures/settings_providers_new_z3950.png +0 -0
- data/share/gnome/help/alexandria/C/figures/smart_libraries_new_smart_library.png +0 -0
- data/share/gnome/help/alexandria/C/figures/working_with_libraries_library_pane.png +0 -0
- data/share/gnome/help/alexandria/C/getting-started.xml +154 -0
- data/share/gnome/help/alexandria/C/gnu-fdl-1.2.xml +543 -0
- data/share/gnome/help/alexandria/C/introduction.xml +142 -0
- data/share/gnome/help/alexandria/C/searching.xml +90 -0
- data/share/gnome/help/alexandria/C/settings.xml +140 -0
- data/share/gnome/help/alexandria/C/smart-libraries.xml +160 -0
- data/share/gnome/help/alexandria/C/working-with-libraries.xml +76 -0
- data/share/gnome/help/alexandria/ChangeLog +99 -0
- data/share/gnome/help/alexandria/fr/alexandria.xml +2292 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_add_button.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_add_by_isbn_1.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_add_by_search_1.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_add_manually.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_add_z3950.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_close_button.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_edit_info.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_export_web_page.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_importing.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_library_pane.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_list_view.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_list_view_search.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_loaning.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_main_window.png +0 -0
- data/share/gnome/help/alexandria/fr/figures/alexandria_remove_button.png +0 -0
- data/share/gnome/help/alexandria/ja/about.xml +33 -0
- data/share/gnome/help/alexandria/ja/adding-books.xml +314 -0
- data/share/gnome/help/alexandria/ja/alexandria.xml +172 -0
- data/share/gnome/help/alexandria/ja/bugs.xml +11 -0
- data/share/gnome/help/alexandria/ja/editing-book-properties.xml +100 -0
- data/share/gnome/help/alexandria/ja/exporting.xml +98 -0
- data/share/gnome/help/alexandria/ja/figures/adding_books_acquire_from_scanner_process.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/adding_books_add_by_isbn.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/adding_books_isbn_import.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/adding_books_manual_details.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/adding_books_rename_library_after_import.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/adding_books_search_results.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/editing_book_properties_info.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/editing_book_properties_loaning.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/exporting_information_html.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/getting_started_first_launched.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/searching_filtering_views_list_view.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/searching_filtering_views_list_view_search.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/settings_providers_new_z3950.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/smart_libraries_new_smart_library.png +0 -0
- data/share/gnome/help/alexandria/ja/figures/working_with_libraries_library_pane.png +0 -0
- data/share/gnome/help/alexandria/ja/getting-started.xml +144 -0
- data/share/gnome/help/alexandria/ja/gnu-fdl-1.2.xml +541 -0
- data/share/gnome/help/alexandria/ja/introduction.xml +134 -0
- data/share/gnome/help/alexandria/ja/searching.xml +104 -0
- data/share/gnome/help/alexandria/ja/settings.xml +129 -0
- data/share/gnome/help/alexandria/ja/smart-libraries.xml +140 -0
- data/share/gnome/help/alexandria/ja/working-with-libraries.xml +88 -0
- data/share/menu/alexandria +7 -0
- data/share/omf/alexandria/alexandria-C.omf.in +27 -0
- data/share/omf/alexandria/alexandria-fr.omf.in +30 -0
- data/share/sounds/alexandria/bad_scan.ogg +0 -0
- data/share/sounds/alexandria/bad_scan.wav +0 -0
- data/share/sounds/alexandria/good_scan.ogg +0 -0
- data/share/sounds/alexandria/good_scan.wav +0 -0
- data/share/sounds/alexandria/scanning.ogg +0 -0
- data/share/sounds/alexandria/scanning.wav +0 -0
- data/spec/alexandria/library_spec.rb +205 -0
- data/spec/alexandria/preferences_spec.rb +22 -0
- data/spec/alexandria/scanners/cuecat_spec.rb +68 -0
- data/spec/alexandria/smart_library_spec.rb +22 -0
- data/spec/alexandria/ui/dialogs_spec.rb +90 -0
- data/spec/alexandria/ui/iconview_spec.rb +27 -0
- data/spec/alexandria/ui/listview_spec.rb +28 -0
- data/spec/alexandria/ui/main_app_spec.rb +48 -0
- data/spec/alexandria/ui/sidepane_spec.rb +27 -0
- data/spec/alexandria/ui/ui_manager_spec.rb +26 -0
- data/spec/alexandria/ui/ui_utilities_spec.rb +60 -0
- data/spec/alexandria/utilities_spec.rb +50 -0
- data/spec/data/libraries/0.6.1-noisbn/My Library/0201398257.yaml +10 -0
- data/spec/data/libraries/0.6.1-noisbn/My Library/1565920007.yaml +10 -0
- data/spec/data/libraries/0.6.1/My Library/0192812173.yaml +9 -0
- data/spec/data/libraries/0.6.1/My Library/0201398257.cover +0 -0
- data/spec/data/libraries/0.6.1/My Library/0201398257.yaml +10 -0
- data/spec/data/libraries/0.6.1/My Library/1565920007.yaml +10 -0
- data/spec/data/libraries/0.6.2/My Library/9780140266146.cover +0 -0
- data/spec/data/libraries/0.6.2/My Library/9780140266146.yaml +16 -0
- data/spec/data/libraries/0.6.2/My Library/9780140278781.cover +0 -0
- data/spec/data/libraries/0.6.2/My Library/9780140278781.yaml +21 -0
- data/spec/data/libraries/0.6.2/My Library/9780571147168.cover +0 -0
- data/spec/data/libraries/0.6.2/My Library/9780571147168.yaml +20 -0
- data/spec/data/libraries/0.6.2/My Library/9780575079038.cover +0 -0
- data/spec/data/libraries/0.6.2/My Library/9780575079038.yaml +20 -0
- data/spec/data/libraries/0.6.2/My Library/9780755322800.cover +0 -0
- data/spec/data/libraries/0.6.2/My Library/9780755322800.yaml +20 -0
- data/spec/spec_helper.rb +46 -0
- data/tasks/rdoc.rake +6 -0
- data/tasks/setup.rb +30 -0
- data/tasks/spec.rake +29 -0
- data/tasks/test.rake +38 -0
- data/test/application_test.rb +39 -0
- data/test/book_test.rb +34 -0
- data/test/data/isbns.txt +3 -0
- data/test/isbn_test.rb +68 -0
- data/test/providers_test.rb +254 -0
- data/test/test_helper.rb +42 -0
- data/util/rake/fileinstall.rb +313 -0
- data/util/rake/gettextgenerate.rb +158 -0
- data/util/rake/omfgenerate.rb +79 -0
- metadata +452 -0
@@ -0,0 +1,197 @@
|
|
1
|
+
# Copyright (C) 2009 Cathal Mc Ginley
|
2
|
+
# Copyright (C) 2014 Matijs van Zuijlen
|
3
|
+
#
|
4
|
+
# Alexandria is free software; you can redistribute it and/or
|
5
|
+
# modify it under the terms of the GNU General Public License as
|
6
|
+
# published by the Free Software Foundation; either version 2 of the
|
7
|
+
# License, or (at your option) any later version.
|
8
|
+
#
|
9
|
+
# Alexandria is distributed in the hope that it will be useful,
|
10
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
12
|
+
# General Public License for more details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU General Public
|
15
|
+
# License along with Alexandria; see the file COPYING. If not,
|
16
|
+
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
17
|
+
# Fifth Floor, Boston, MA 02110-1301 USA.
|
18
|
+
|
19
|
+
# http://de.wikipedia.org/wiki/Thalia_%28Buchhandel%29
|
20
|
+
# Thalia.de bought the Austrian book trade chain Amadeus
|
21
|
+
|
22
|
+
# New Tlalia provider, taken from Palatina MetaDataSource and modified
|
23
|
+
# for Alexandria. (21 Dec 2009)
|
24
|
+
|
25
|
+
require 'net/http'
|
26
|
+
require 'cgi'
|
27
|
+
require 'alexandria/book_providers/web'
|
28
|
+
|
29
|
+
module Alexandria
|
30
|
+
class BookProviders
|
31
|
+
class ThaliaProvider < WebsiteBasedProvider
|
32
|
+
include Alexandria::Logging
|
33
|
+
|
34
|
+
SITE = 'http://www.thalia.de'
|
35
|
+
BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s" # type,term
|
36
|
+
|
37
|
+
def initialize
|
38
|
+
super('Thalia', 'Thalia (Germany)')
|
39
|
+
# no preferences for the moment
|
40
|
+
prefs.read
|
41
|
+
end
|
42
|
+
|
43
|
+
def url(book)
|
44
|
+
create_search_uri(SEARCH_BY_ISBN, book.isbn)
|
45
|
+
end
|
46
|
+
|
47
|
+
def search(criterion, type)
|
48
|
+
req = create_search_uri(type, criterion)
|
49
|
+
puts req if $DEBUG
|
50
|
+
html_data = transport.get_response(URI.parse(req))
|
51
|
+
if type == SEARCH_BY_ISBN
|
52
|
+
parse_result_data(html_data.body, criterion)
|
53
|
+
else
|
54
|
+
results = parse_search_result_data(html_data.body)
|
55
|
+
raise NoResultsError if results.empty?
|
56
|
+
results.map { |result| get_book_from_search_result(result) }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def create_search_uri(search_type, search_term)
|
61
|
+
search_type_code = {
|
62
|
+
SEARCH_BY_ISBN => 'sq',
|
63
|
+
SEARCH_BY_AUTHORS => 'sa', # Autor
|
64
|
+
SEARCH_BY_TITLE => 'st', # Titel
|
65
|
+
SEARCH_BY_KEYWORD => 'ssw' # Schlagwort
|
66
|
+
}[search_type] or ''
|
67
|
+
search_type_code = CGI.escape(search_type_code)
|
68
|
+
search_term_encoded = search_term
|
69
|
+
if search_type == SEARCH_BY_ISBN
|
70
|
+
# search_term_encoded = search_term.as_isbn_13
|
71
|
+
search_term_encoded = Library.canonicalise_isbn(search_term) # check this!
|
72
|
+
else
|
73
|
+
search_term_encoded = CGI.escape(search_term)
|
74
|
+
end
|
75
|
+
BASE_SEARCH_URL % [search_type_code, search_term_encoded]
|
76
|
+
end
|
77
|
+
|
78
|
+
def parse_search_result_data(html)
|
79
|
+
doc = html_to_doc(html)
|
80
|
+
book_search_results = []
|
81
|
+
results_divs = doc / 'div.articlePresentationSearchCH'
|
82
|
+
results_divs.each do |div|
|
83
|
+
result = {}
|
84
|
+
title_link = div % 'div.articleText/h2/a'
|
85
|
+
result[:title] = title_link.inner_html
|
86
|
+
result[:lookup_url] = title_link['href']
|
87
|
+
book_search_results << result
|
88
|
+
end
|
89
|
+
book_search_results
|
90
|
+
end
|
91
|
+
|
92
|
+
def data_from_label(node, label_text)
|
93
|
+
label_node = node % "strong[text()*='#{label_text}']"
|
94
|
+
if (item_node = label_node.parent)
|
95
|
+
data = ''
|
96
|
+
item_node.children.each do |n|
|
97
|
+
if n.text?
|
98
|
+
data += n.to_html
|
99
|
+
end
|
100
|
+
end
|
101
|
+
data.strip
|
102
|
+
else
|
103
|
+
''
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def get_book_from_search_result(result)
|
108
|
+
log.debug { "Fetching book from #{result[:lookup_url]}" }
|
109
|
+
html_data = transport.get_response(URI.parse(result[:lookup_url]))
|
110
|
+
parse_result_data(html_data.body, 'noisbn', true)
|
111
|
+
end
|
112
|
+
|
113
|
+
def parse_result_data(html, isbn, recursing = false)
|
114
|
+
doc = html_to_doc(html)
|
115
|
+
|
116
|
+
results_divs = doc / 'div.articlePresentationSearchCH'
|
117
|
+
unless results_divs.empty?
|
118
|
+
if recursing
|
119
|
+
# already recursing, avoid doing so endlessly second time
|
120
|
+
# around *should* lead to a book description, not a result
|
121
|
+
# list
|
122
|
+
return
|
123
|
+
end
|
124
|
+
# ISBN-lookup results in multiple results (trying to be
|
125
|
+
# useful, such as for new editions e.g. 9780974514055
|
126
|
+
# "Programming Ruby" )
|
127
|
+
results = parse_search_result_data(html)
|
128
|
+
isbn10 = Library.canonicalise_isbn(isbn)
|
129
|
+
# e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html
|
130
|
+
chosen = results.first # fallback!
|
131
|
+
results.each do |rslt|
|
132
|
+
if rslt[:lookup_url] =~ /\/ISBN(\d+[\d-]*)\//
|
133
|
+
if Regexp.last_match[1].gsub('-', '') == isbn10
|
134
|
+
chosen = rslt
|
135
|
+
break
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
html_data = transport.get_response(URI.parse(chosen[:lookup_url]))
|
140
|
+
return parse_result_data(html_data.body, isbn, true)
|
141
|
+
end
|
142
|
+
|
143
|
+
begin
|
144
|
+
if (div = doc % 'div#contentFull')
|
145
|
+
title_img = ((div % :h2) / :img).first
|
146
|
+
title = title_img['alt']
|
147
|
+
|
148
|
+
# note, the following img also has alt="von Author, Author..."
|
149
|
+
|
150
|
+
if (author_h = doc % 'h3[text()*="Mehr von"]') # "More from..." links
|
151
|
+
authors = []
|
152
|
+
author_links = author_h.parent / :a
|
153
|
+
author_links.each do |a|
|
154
|
+
if a['href'] =~ /BUCH\/sa/
|
155
|
+
# 'sa' means search author, there may also be 'ssw' (search keyword) links
|
156
|
+
authors << a.inner_text[0..-2].strip
|
157
|
+
# NOTE stripping the little >> character here...
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
item_details = doc % 'ul.itemDataList'
|
163
|
+
isbns = []
|
164
|
+
isbns << data_from_label(item_details, 'EAN')
|
165
|
+
isbns << data_from_label(item_details, 'ISBN')
|
166
|
+
|
167
|
+
year = nil
|
168
|
+
date = data_from_label(item_details, 'Erschienen:')
|
169
|
+
if date =~ /([\d]{4})/
|
170
|
+
year = Regexp.last_match[1].to_i
|
171
|
+
end
|
172
|
+
|
173
|
+
binding = data_from_label(item_details, 'Einband')
|
174
|
+
|
175
|
+
publisher = data_from_label(item_details, 'Erschienen bei:')
|
176
|
+
|
177
|
+
book = Book.new(title, authors, isbns.first,
|
178
|
+
publisher, year, binding)
|
179
|
+
|
180
|
+
image_url = nil
|
181
|
+
if (image_link = doc % 'a[@id=itemPicStart]')
|
182
|
+
image_url = image_link['href']
|
183
|
+
end
|
184
|
+
|
185
|
+
return [book, image_url]
|
186
|
+
|
187
|
+
end
|
188
|
+
rescue => ex
|
189
|
+
trace = ex.backtrace.join("\n> ")
|
190
|
+
log.warn {'Failed parsing search results for Thalia ' \
|
191
|
+
"#{ex.message} #{trace}" }
|
192
|
+
raise NoResultsError
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2009 Cathal Mc Ginley
|
4
|
+
# Copyright (C) 2014 Matijs van Zuijlen
|
5
|
+
#
|
6
|
+
# Alexandria is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU General Public License as
|
8
|
+
# published by the Free Software Foundation; either version 2 of the
|
9
|
+
# License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Alexandria is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public
|
17
|
+
# License along with Alexandria; see the file COPYING. If not,
|
18
|
+
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
19
|
+
# Fifth Floor, Boston, MA 02110-1301 USA.
|
20
|
+
|
21
|
+
require 'hpricot'
|
22
|
+
require 'htmlentities'
|
23
|
+
|
24
|
+
module Alexandria
|
25
|
+
class BookProviders
|
26
|
+
class WebsiteBasedProvider < GenericProvider
|
27
|
+
def initialize(name, fullname = nil)
|
28
|
+
super(name, fullname)
|
29
|
+
@htmlentities = HTMLEntities.new
|
30
|
+
end
|
31
|
+
|
32
|
+
def html_to_doc(html, source_data_charset = 'ISO-8859-1')
|
33
|
+
html.force_encoding source_data_charset
|
34
|
+
utf8_html = html.encode('utf-8')
|
35
|
+
normalized_html = @htmlentities.decode(utf8_html)
|
36
|
+
Hpricot(normalized_html)
|
37
|
+
end
|
38
|
+
|
39
|
+
## from Palatina
|
40
|
+
def text_of(node)
|
41
|
+
if node.nil?
|
42
|
+
nil
|
43
|
+
else
|
44
|
+
if node.text?
|
45
|
+
node.to_html
|
46
|
+
elsif node.elem?
|
47
|
+
if node.children.nil?
|
48
|
+
return nil
|
49
|
+
else
|
50
|
+
node_text = node.children.map { |n| text_of(n) }.join
|
51
|
+
node_text.strip.squeeze(' ')
|
52
|
+
end
|
53
|
+
end
|
54
|
+
# node.inner_html.strip
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
# Copyright (C) 2007 Marco Costantini
|
2
|
+
# Copyright (C) 2014 Matijs van Zuijlen
|
3
|
+
# based on ibs_it.rb by Claudio Belotti
|
4
|
+
#
|
5
|
+
# Alexandria is free software; you can redistribute it and/or
|
6
|
+
# modify it under the terms of the GNU General Public License as
|
7
|
+
# published by the Free Software Foundation; either version 2 of the
|
8
|
+
# License, or (at your option) any later version.
|
9
|
+
#
|
10
|
+
# Alexandria is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
13
|
+
# General Public License for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public
|
16
|
+
# License along with Alexandria; see the file COPYING. If not,
|
17
|
+
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
18
|
+
# Fifth Floor, Boston, MA 02110-1301 USA.
|
19
|
+
|
20
|
+
require 'fileutils'
|
21
|
+
require 'net/http'
|
22
|
+
require 'open-uri'
|
23
|
+
# require 'cgi'
|
24
|
+
|
25
|
+
module Alexandria
|
26
|
+
class BookProviders
|
27
|
+
class Webster_itProvider < GenericProvider
|
28
|
+
BASE_URI = 'http://www.libreriauniversitaria.it' # also "http://www.webster.it"
|
29
|
+
CACHE_DIR = File.join(Alexandria::Library::DIR, '.webster_it_cache')
|
30
|
+
REFERER = BASE_URI
|
31
|
+
LOCALE = 'BIT' # used only for search by title/author/keyword. possible are: "BIT", "BUS", "BUK", "BDE", "MIT"
|
32
|
+
def initialize
|
33
|
+
super('Webster_it', 'Webster (Italy)')
|
34
|
+
FileUtils.mkdir_p(CACHE_DIR) unless File.exist?(CACHE_DIR)
|
35
|
+
# no preferences for the moment
|
36
|
+
at_exit { clean_cache }
|
37
|
+
end
|
38
|
+
|
39
|
+
def search(criterion, type)
|
40
|
+
criterion = criterion.convert('ISO-8859-15', 'UTF-8')
|
41
|
+
req = BASE_URI + '/'
|
42
|
+
req += case type
|
43
|
+
when SEARCH_BY_ISBN
|
44
|
+
'isbn/' # "#{LOCALE}/"
|
45
|
+
|
46
|
+
when SEARCH_BY_TITLE
|
47
|
+
"c_search.php?noinput=1&shelf=#{LOCALE}&title_query="
|
48
|
+
|
49
|
+
when SEARCH_BY_AUTHORS
|
50
|
+
"c_search.php?noinput=1&shelf=#{LOCALE}&author_query="
|
51
|
+
|
52
|
+
when SEARCH_BY_KEYWORD
|
53
|
+
"c_search.php?noinput=1&shelf=#{LOCALE}&subject_query="
|
54
|
+
|
55
|
+
else
|
56
|
+
raise InvalidSearchTypeError
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
req += CGI.escape(criterion)
|
61
|
+
p req if $DEBUG
|
62
|
+
data = transport.get(URI.parse(req))
|
63
|
+
if type == SEARCH_BY_ISBN
|
64
|
+
to_book(data) # rescue raise NoResultsError
|
65
|
+
else
|
66
|
+
begin
|
67
|
+
results = []
|
68
|
+
each_book_page(data) do |code, _title|
|
69
|
+
results << to_book(transport.get(URI.parse(BASE_URI + "/#{LOCALE}/" + code)))
|
70
|
+
end
|
71
|
+
return results
|
72
|
+
rescue
|
73
|
+
raise NoResultsError
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def url(book)
|
79
|
+
BASE_URI + '/isbn/' + book.isbn
|
80
|
+
end
|
81
|
+
|
82
|
+
#######
|
83
|
+
private
|
84
|
+
#######
|
85
|
+
|
86
|
+
def to_book(data)
|
87
|
+
raise NoResultsError if /<font color="\#ffffff"><b>Prodotto non esistente<\/b><\/font>/.match(data)
|
88
|
+
data = data.convert('UTF-8', 'ISO-8859-15')
|
89
|
+
|
90
|
+
md = /<li><span class="product_label">Titolo:<\/span><span class="product_text"> ([^<]+)/.match(data)
|
91
|
+
raise unless md
|
92
|
+
title = CGI.unescape(md[1].strip)
|
93
|
+
if (md = /<span class="product_heading_volume">([^<]+)/.match(data))
|
94
|
+
title += ' ' + CGI.unescape(md[1].strip)
|
95
|
+
end
|
96
|
+
|
97
|
+
authors = []
|
98
|
+
if (md = /<li><span class="product_label">Autor[ei]:<\/span> <span class="product_text">(<a href="[^>]+">([^<]+)<\/a>,? ?)+<\/span><li>/.match(data))
|
99
|
+
this = CGI.unescape(md[0].strip)
|
100
|
+
authors = this.scan(/<a href="[^>]+">([^<]+)<\/a>,?/)
|
101
|
+
authors = authors.map { |author| author[0] }
|
102
|
+
# puts this
|
103
|
+
# md[1].strip.split(', ').each { |a| authors << CGI.unescape(a.strip) }
|
104
|
+
end
|
105
|
+
|
106
|
+
md = /<li><span class="product_label">ISBN:<\/span> <span class="product_text">([^<]+)/.match(data)
|
107
|
+
raise unless md
|
108
|
+
isbn = Library.canonicalise_ean(md[1].strip)
|
109
|
+
|
110
|
+
# raise unless
|
111
|
+
md = /<li><span class="product_label">Editore:<\/span> <span class="product_text"><a href="[^>]+>([^<]+)/.match(data)
|
112
|
+
publisher = CGI.unescape(md[1].strip) or md
|
113
|
+
|
114
|
+
if (md = /<li><span class="product_label">Pagine:<\/span> <span class="product_text">([^<]+)/.match(data))
|
115
|
+
edition = CGI.unescape(md[1].strip) + ' p.'
|
116
|
+
else
|
117
|
+
edition = nil
|
118
|
+
end
|
119
|
+
|
120
|
+
publish_year = nil
|
121
|
+
if (md = /<li><span class="product_label">Data di Pubblicazione:<\/span> <span class="product_text">([^<]+)/.match(data))
|
122
|
+
publish_year = CGI.unescape(md[1].strip)[-4..-1].to_i
|
123
|
+
publish_year = nil if publish_year == 0
|
124
|
+
end
|
125
|
+
|
126
|
+
if data =~ /javascript:popImage/ and (md = /<img border="0" alt="[^"]+" src="([^"]+)/.match(data))
|
127
|
+
cover_url = BASE_URI + md[1].strip
|
128
|
+
# use "p" instead of "g" for smaller image
|
129
|
+
if cover_url[-5] == 103
|
130
|
+
cover_url[-5] = 112
|
131
|
+
end
|
132
|
+
|
133
|
+
cover_filename = isbn + '.tmp'
|
134
|
+
Dir.chdir(CACHE_DIR) do
|
135
|
+
begin
|
136
|
+
cover_data = open(cover_url, 'Referer' => REFERER).read
|
137
|
+
rescue OpenURI::HTTPError
|
138
|
+
cover_data = nil
|
139
|
+
end
|
140
|
+
if cover_data
|
141
|
+
File.open(cover_filename, 'w') do |file|
|
142
|
+
file.write cover_data
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
medium_cover = CACHE_DIR + '/' + cover_filename
|
148
|
+
if File.size(medium_cover) > 0
|
149
|
+
puts medium_cover + ' has non-0 size' if $DEBUG
|
150
|
+
return [Book.new(title, authors, isbn, publisher, publish_year, edition), medium_cover]
|
151
|
+
end
|
152
|
+
puts medium_cover + ' has 0 size, removing ...' if $DEBUG
|
153
|
+
File.delete(medium_cover)
|
154
|
+
end
|
155
|
+
[Book.new(title, authors, isbn, publisher, publish_year, edition)]
|
156
|
+
end
|
157
|
+
|
158
|
+
def each_book_page(data)
|
159
|
+
raise if data.scan(/<tr ><td width="10%" align="center""> <a href="#{LOCALE}\/([^\/]+)/) { |a| yield a }.empty?
|
160
|
+
end
|
161
|
+
|
162
|
+
def clean_cache
|
163
|
+
# FIXME begin ... rescue ... end?
|
164
|
+
Dir.chdir(CACHE_DIR) do
|
165
|
+
Dir.glob('*.tmp') do |file|
|
166
|
+
puts 'removing ' + file if $DEBUG
|
167
|
+
File.delete(file)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,251 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
#
|
3
|
+
# Copyright (C) 2009 Cathal Mc Ginley
|
4
|
+
# Copyright (C) 2011, 2014 Matijs van Zuijlen
|
5
|
+
#
|
6
|
+
# Alexandria is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU General Public License as
|
8
|
+
# published by the Free Software Foundation; either version 2 of the
|
9
|
+
# License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# Alexandria is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public
|
17
|
+
# License along with Alexandria; see the file COPYING. If not,
|
18
|
+
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
19
|
+
# Fifth Floor, Boston, MA 02110-1301 USA.
|
20
|
+
|
21
|
+
# http://en.wikipedia.org/wiki/WorldCat
|
22
|
+
# See http://www.oclc.org/worldcat/policies/terms/
|
23
|
+
|
24
|
+
# New WorldCat provider, taken from the Palatina MetaDataSource and
|
25
|
+
# modified to fit the structure of Alexandria book providers.
|
26
|
+
# (25 Feb 2009)
|
27
|
+
#
|
28
|
+
# Updated from Palatina, to reflect changes in the worldcat website.
|
29
|
+
# (1 Sep 2009)
|
30
|
+
|
31
|
+
require 'cgi'
|
32
|
+
require 'alexandria/net'
|
33
|
+
require 'alexandria/book_providers/web'
|
34
|
+
|
35
|
+
module Alexandria
|
36
|
+
class BookProviders
|
37
|
+
class WorldCatProvider < WebsiteBasedProvider
|
38
|
+
include Alexandria::Logging
|
39
|
+
|
40
|
+
SITE = 'http://www.worldcat.org'
|
41
|
+
BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced" # type, term
|
42
|
+
|
43
|
+
def initialize
|
44
|
+
super('WorldCat', 'WorldCat')
|
45
|
+
# prefs.add("enabled", _("Enabled"), true, [true,false])
|
46
|
+
prefs.read
|
47
|
+
end
|
48
|
+
|
49
|
+
def search(criterion, type)
|
50
|
+
# puts create_search_uri(type, criterion)
|
51
|
+
req = create_search_uri(type, criterion)
|
52
|
+
puts req if $DEBUG
|
53
|
+
html_data = transport.get_response(URI.parse(req))
|
54
|
+
# Note: I tried to use Alexandria::WWWAgent,
|
55
|
+
# but this caused failures here (empty pages...)
|
56
|
+
# find out how the requests differ
|
57
|
+
|
58
|
+
# puts html_data.class
|
59
|
+
if type == SEARCH_BY_ISBN
|
60
|
+
parse_result_data(html_data.body, criterion)
|
61
|
+
else
|
62
|
+
results = parse_search_result_data(html_data.body)
|
63
|
+
raise NoResultsError if results.empty?
|
64
|
+
|
65
|
+
results.map { |result| get_book_from_search_result(result) }
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def url(book)
|
70
|
+
create_search_uri(SEARCH_BY_ISBN, book.isbn)
|
71
|
+
rescue => ex
|
72
|
+
log.warn { "Cannot create url for book #{book}; #{ex.message}" }
|
73
|
+
nil
|
74
|
+
end
|
75
|
+
|
76
|
+
private
|
77
|
+
|
78
|
+
def create_search_uri(search_type, search_term)
|
79
|
+
search_type_code = { SEARCH_BY_ISBN => 'isbn:',
|
80
|
+
SEARCH_BY_AUTHORS => 'au:',
|
81
|
+
SEARCH_BY_TITLE => 'ti:',
|
82
|
+
SEARCH_BY_KEYWORD => ''
|
83
|
+
}[search_type] or ''
|
84
|
+
search_type_code = CGI.escape(search_type_code)
|
85
|
+
search_term_encoded = search_term # TODO, remove attack stuff
|
86
|
+
if search_type == SEARCH_BY_ISBN
|
87
|
+
search_term_encoded = Library.canonicalise_ean(search_term) # isbn-13
|
88
|
+
else
|
89
|
+
search_term_encoded = CGI.escape(search_term)
|
90
|
+
end
|
91
|
+
BASE_SEARCH_URL % [search_type_code, search_term_encoded]
|
92
|
+
end
|
93
|
+
|
94
|
+
def get_book_from_search_result(result)
|
95
|
+
log.debug { "Fetching book from #{result[:url]}" }
|
96
|
+
html_data = transport.get_response(URI.parse(result[:url]))
|
97
|
+
parse_result_data(html_data.body)
|
98
|
+
end
|
99
|
+
|
100
|
+
def parse_search_result_data(html)
|
101
|
+
doc = html_to_doc(html, 'UTF-8')
|
102
|
+
book_search_results = []
|
103
|
+
begin
|
104
|
+
result_cells = doc / 'td.result/div.name/..'
|
105
|
+
# puts result_cells.length
|
106
|
+
result_cells.each do |td|
|
107
|
+
type_icon = (td % 'div.type/img.icn')
|
108
|
+
next unless type_icon and type_icon['src'] =~ /icon-bks/
|
109
|
+
name_div = td % 'div.name'
|
110
|
+
title = name_div.inner_text
|
111
|
+
anchor = name_div % :a
|
112
|
+
if anchor
|
113
|
+
url = anchor['href']
|
114
|
+
end
|
115
|
+
lookup_url = "#{SITE}#{url}"
|
116
|
+
result = {}
|
117
|
+
result[:title] = title
|
118
|
+
result[:url] = lookup_url
|
119
|
+
|
120
|
+
book_search_results << result
|
121
|
+
end
|
122
|
+
rescue => ex
|
123
|
+
trace = ex.backtrace.join("\n> ")
|
124
|
+
log.warn {'Failed parsing search results for WorldCat ' \
|
125
|
+
"#{ex.message} #{trace}" }
|
126
|
+
end
|
127
|
+
book_search_results
|
128
|
+
end
|
129
|
+
|
130
|
+
def parse_result_data(html, search_isbn = nil, recursing = false)
|
131
|
+
doc = html_to_doc(html, 'UTF-8')
|
132
|
+
|
133
|
+
begin
|
134
|
+
if doc % 'div#div-results-none'
|
135
|
+
log.debug { 'WorldCat reports no results' }
|
136
|
+
raise NoResultsError
|
137
|
+
end
|
138
|
+
|
139
|
+
if doc % 'table.table-results'
|
140
|
+
if recursing
|
141
|
+
log.warn { 'Infinite loop prevented redirecting through WorldCat' }
|
142
|
+
raise NoResultsError
|
143
|
+
end
|
144
|
+
log.info { 'Found multiple results for lookup: checking each' }
|
145
|
+
search_results = parse_search_result_data(html)
|
146
|
+
book = nil
|
147
|
+
cover_url = nil
|
148
|
+
first_result = nil
|
149
|
+
search_results.each do |rslt|
|
150
|
+
# rslt = search_results.rslt
|
151
|
+
log.debug { "checking #{rslt[:url]}" }
|
152
|
+
rslt2 = transport.get_response(URI.parse(rslt[:url]))
|
153
|
+
html2 = rslt2.body
|
154
|
+
|
155
|
+
book, cover_url = parse_result_data(html2, search_isbn, true)
|
156
|
+
if first_result.nil?
|
157
|
+
first_result = [book, cover_url]
|
158
|
+
end
|
159
|
+
|
160
|
+
log.debug { "got book #{book}" }
|
161
|
+
|
162
|
+
if search_isbn
|
163
|
+
search_isbn_canon = Library.canonicalise_ean(search_isbn)
|
164
|
+
rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
|
165
|
+
if search_isbn_canon == rslt_isbn_canon
|
166
|
+
log.info { "book #{book} is a match" }
|
167
|
+
return [book, cover_url]
|
168
|
+
end
|
169
|
+
log.debug { 'not a match, checking next' }
|
170
|
+
else
|
171
|
+
# no constraint to match isbn, just return first result
|
172
|
+
return [book, cover_url]
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# gone through all and no ISBN match, so just return first result
|
177
|
+
log.info { 'no more results to check. Returning first result, just an approximation' }
|
178
|
+
return first_result
|
179
|
+
|
180
|
+
end
|
181
|
+
|
182
|
+
title_header = doc % 'h1.title'
|
183
|
+
title = title_header.inner_text if title_header
|
184
|
+
unless title
|
185
|
+
log.warn { 'Unexpected lack of title from WorldCat lookup' }
|
186
|
+
raise NoResultsError
|
187
|
+
end
|
188
|
+
log.info { "Found book #{title} at WorldCat" }
|
189
|
+
|
190
|
+
authors = []
|
191
|
+
authors_tr = doc % 'tr#details-allauthors'
|
192
|
+
if authors_tr
|
193
|
+
(authors_tr / :a).each do |a|
|
194
|
+
authors << a.inner_text
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
# can we do better? get the City name?? or multiple publishers?
|
199
|
+
bibdata = doc % 'div#bibdata'
|
200
|
+
bibdata_table = bibdata % :table
|
201
|
+
publisher_row = bibdata_table % 'th[text()*=Publisher]/..'
|
202
|
+
|
203
|
+
if publisher_row
|
204
|
+
publication_info = (publisher_row / 'td').last.inner_text
|
205
|
+
|
206
|
+
if publication_info.index(';')
|
207
|
+
publication_info =~ /;[\s]*([^\d]+)[\s]*[\d]*/
|
208
|
+
elsif publication_info.index(':')
|
209
|
+
publication_info =~ /:[\s]*([^;:,]+)/
|
210
|
+
else
|
211
|
+
publication_info =~ /([^;,]+)/
|
212
|
+
end
|
213
|
+
|
214
|
+
publisher = Regexp.last_match[1]
|
215
|
+
publication_info =~ /([12][0-9]{3})/
|
216
|
+
year = Regexp.last_match[1].to_i if Regexp.last_match[1]
|
217
|
+
else
|
218
|
+
publisher = nil
|
219
|
+
year = nil
|
220
|
+
end
|
221
|
+
|
222
|
+
isbn = search_isbn
|
223
|
+
unless isbn
|
224
|
+
isbn_row = doc % 'tr#details-standardno' # #bibdata_table % 'th[text()*=ISBN]/..'
|
225
|
+
if isbn_row
|
226
|
+
isbns = (isbn_row / 'td').last.inner_text.split
|
227
|
+
isbn = Library.canonicalise_isbn(isbns.first)
|
228
|
+
else
|
229
|
+
log.warn { 'No ISBN found on page' }
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
binding = '' # not given on WorldCat website (as far as I can tell)
|
234
|
+
|
235
|
+
book = Book.new(title, authors, isbn, publisher, year, binding)
|
236
|
+
|
237
|
+
image_url = nil # hm, it's on the website, but uses JavaScript...
|
238
|
+
|
239
|
+
return [book, image_url]
|
240
|
+
|
241
|
+
rescue => ex
|
242
|
+
raise ex if ex.instance_of? NoResultsError
|
243
|
+
trace = ex.backtrace.join("\n> ")
|
244
|
+
log.warn {'Failed parsing search results for WorldCat ' \
|
245
|
+
"#{ex.message} #{trace}" }
|
246
|
+
raise NoResultsError
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end # class WorldCatProvider
|
250
|
+
end # class BookProviders
|
251
|
+
end # module Alexandria
|