alexandria-book-collection-manager 0.7.5 → 0.7.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +9 -0
- data/.github/workflows/ruby.yml +72 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +65 -30
- data/.rubocop_todo.yml +49 -165
- data/.simplecov +5 -2
- data/CHANGELOG.md +64 -0
- data/ChangeLog.0 +19 -19
- data/INSTALL.md +26 -16
- data/README.md +31 -35
- data/Rakefile +18 -16
- data/alexandria-book-collection-manager.gemspec +35 -29
- data/doc/FAQ +2 -2
- data/doc/dependency_decisions.yml +22 -3
- data/lib/alexandria/about.rb +1 -1
- data/lib/alexandria/book_providers/bl_provider.rb +88 -0
- data/lib/alexandria/book_providers/douban.rb +2 -2
- data/lib/alexandria/book_providers/loc_provider.rb +38 -0
- data/lib/alexandria/book_providers/pseudomarc.rb +61 -71
- data/lib/alexandria/book_providers/sbn_provider.rb +108 -0
- data/lib/alexandria/book_providers/{thalia.rb → thalia_provider.rb} +37 -74
- data/lib/alexandria/book_providers/web.rb +2 -2
- data/lib/alexandria/book_providers/worldcat.rb +34 -38
- data/lib/alexandria/book_providers/z3950_provider.rb +199 -0
- data/lib/alexandria/book_providers.rb +48 -65
- data/lib/alexandria/default_preferences.rb +2 -1
- data/lib/alexandria/execution_queue.rb +13 -12
- data/lib/alexandria/export_library.rb +21 -22
- data/lib/alexandria/image_fetcher.rb +25 -0
- data/lib/alexandria/import_library.rb +46 -70
- data/lib/alexandria/import_library_csv.rb +16 -16
- data/lib/alexandria/library_sort_order.rb +3 -1
- data/lib/alexandria/library_store.rb +19 -20
- data/lib/alexandria/logging.rb +5 -9
- data/lib/alexandria/models/book.rb +15 -2
- data/lib/alexandria/models/library.rb +31 -35
- data/lib/alexandria/net.rb +1 -2
- data/lib/alexandria/preferences.rb +27 -33
- data/lib/alexandria/scanners/cue_cat.rb +6 -6
- data/lib/alexandria/scanners/keyboard.rb +1 -1
- data/lib/alexandria/scanners.rb +2 -2
- data/lib/alexandria/smart_library.rb +22 -26
- data/lib/alexandria/ui/about_dialog.rb +1 -1
- data/lib/alexandria/ui/acquire_dialog.rb +15 -19
- data/lib/alexandria/ui/alert_dialog.rb +36 -19
- data/lib/alexandria/ui/bad_isbns_dialog.rb +13 -9
- data/lib/alexandria/ui/barcode_animation.rb +6 -6
- data/lib/alexandria/ui/book_properties_dialog.rb +2 -3
- data/lib/alexandria/ui/book_properties_dialog_base.rb +35 -137
- data/lib/alexandria/ui/calendar_popup.rb +58 -0
- data/lib/alexandria/ui/callbacks.rb +144 -123
- data/lib/alexandria/ui/completion_models.rb +2 -6
- data/lib/alexandria/ui/confirm_erase_dialog.rb +1 -1
- data/lib/alexandria/ui/conflict_while_copying_dialog.rb +2 -2
- data/lib/alexandria/ui/error_dialog.rb +1 -1
- data/lib/alexandria/ui/export_dialog.rb +19 -18
- data/lib/alexandria/ui/icons.rb +34 -40
- data/lib/alexandria/ui/iconview_tooltips.rb +40 -53
- data/lib/alexandria/ui/import_dialog.rb +49 -48
- data/lib/alexandria/ui/init.rb +14 -12
- data/lib/alexandria/ui/keep_bad_isbn_dialog.rb +2 -2
- data/lib/alexandria/ui/libraries_combo.rb +10 -9
- data/lib/alexandria/ui/listview.rb +6 -7
- data/lib/alexandria/ui/main_app.rb +2 -2
- data/lib/alexandria/ui/multi_drag_treeview.rb +5 -7
- data/lib/alexandria/ui/new_book_dialog.rb +63 -65
- data/lib/alexandria/ui/new_book_dialog_manual.rb +1 -1
- data/lib/alexandria/ui/new_provider_dialog.rb +12 -11
- data/lib/alexandria/ui/new_smart_library_dialog.rb +39 -27
- data/lib/alexandria/ui/preferences_dialog.rb +25 -84
- data/lib/alexandria/ui/provider_preferences_base_dialog.rb +10 -6
- data/lib/alexandria/ui/provider_preferences_dialog.rb +5 -5
- data/lib/alexandria/ui/really_delete_dialog.rb +2 -2
- data/lib/alexandria/ui/sidepane_manager.rb +38 -38
- data/lib/alexandria/ui/skip_entry_dialog.rb +3 -2
- data/lib/alexandria/ui/smart_library_properties_dialog.rb +35 -36
- data/lib/alexandria/ui/smart_library_properties_dialog_base.rb +61 -244
- data/lib/alexandria/ui/smart_library_rule_box.rb +119 -0
- data/lib/alexandria/ui/sound.rb +4 -6
- data/lib/alexandria/ui/ui_manager.rb +80 -83
- data/lib/alexandria/ui.rb +7 -7
- data/lib/alexandria/version.rb +2 -2
- data/lib/alexandria/web_themes.rb +15 -15
- data/lib/alexandria.rb +2 -2
- data/po/cs.po +947 -865
- data/po/cy.po +913 -864
- data/po/de.po +961 -865
- data/po/el.po +956 -861
- data/po/es.po +952 -857
- data/po/fr.po +950 -865
- data/po/ga.po +866 -819
- data/po/gl.po +946 -861
- data/po/it.po +945 -858
- data/po/ja.po +921 -836
- data/po/mk.po +953 -858
- data/po/nb.po +932 -847
- data/po/nl.po +955 -849
- data/po/pl.po +999 -963
- data/po/pt.po +946 -850
- data/po/pt_BR.po +944 -859
- data/po/ru.po +959 -868
- data/po/sk.po +950 -863
- data/po/sv.po +944 -859
- data/po/uk.po +925 -846
- data/po/zh_TW.po +926 -841
- data/schemas/alexandria.schemas +1 -1
- data/share/alexandria/glade/main_app__builder.glade +6 -21
- data/share/gnome/help/alexandria/C/adding-books.xml +3 -4
- data/share/gnome/help/alexandria/C/introduction.xml +0 -16
- data/share/gnome/help/alexandria/C/searching.xml +1 -4
- data/share/gnome/help/alexandria/C/settings.xml +0 -30
- data/share/gnome/help/alexandria/C/smart-libraries.xml +2 -2
- data/share/gnome/help/alexandria/C/working-with-libraries.xml +1 -1
- data/share/gnome/help/alexandria/fr/alexandria.xml +5 -160
- data/share/gnome/help/alexandria/ja/adding-books.xml +1 -1
- data/share/gnome/help/alexandria/ja/introduction.xml +0 -15
- data/share/gnome/help/alexandria/ja/searching.xml +3 -7
- data/share/gnome/help/alexandria/ja/settings.xml +0 -27
- data/share/gnome/help/alexandria/ja/smart-libraries.xml +1 -1
- data/spec/alexandria/book_providers/bl_provider_spec.rb +13 -0
- data/spec/alexandria/book_providers/loc_provider_spec.rb +17 -0
- data/spec/alexandria/book_providers/sbn_provider_spec.rb +13 -0
- data/spec/alexandria/book_providers/thalia_provider_spec.rb +119 -0
- data/spec/alexandria/book_providers/world_cat_provider_spec.rb +160 -0
- data/spec/alexandria/book_providers_spec.rb +0 -154
- data/spec/alexandria/console_spec.rb +0 -5
- data/spec/alexandria/export_library_spec.rb +27 -38
- data/spec/alexandria/library_spec.rb +76 -46
- data/spec/alexandria/preferences_spec.rb +29 -3
- data/spec/alexandria/scanners/cue_cat_spec.rb +1 -1
- data/spec/alexandria/ui/about_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/acquire_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/alert_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/book_properties_dialog_spec.rb +47 -5
- data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/error_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/export_dialog_spec.rb +25 -4
- data/spec/alexandria/ui/icons_spec.rb +26 -0
- data/spec/alexandria/ui/iconview_spec.rb +1 -1
- data/spec/alexandria/ui/import_dialog_spec.rb +35 -3
- data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/main_app_spec.rb +1 -1
- data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +39 -3
- data/spec/alexandria/ui/new_provider_dialog_spec.rb +19 -3
- data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +28 -3
- data/spec/alexandria/ui/preferences_dialog_spec.rb +2 -2
- data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +23 -8
- data/spec/alexandria/ui/really_delete_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/sidepane_manager_spec.rb +2 -2
- data/spec/alexandria/ui/skip_entry_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +37 -6
- data/spec/alexandria/ui/ui_manager_spec.rb +116 -2
- data/spec/data/libraries/0.6.2/My Library/9780571147168.yaml +2 -0
- data/spec/end_to_end/basic_run_spec.rb +3 -8
- data/spec/fixtures/cover.jpg +0 -0
- data/spec/spec_helper.rb +47 -3
- data/tasks/spec.rake +3 -5
- data/util/rake/fileinstall.rb +16 -15
- data/util/rake/omfgenerate.rb +1 -1
- metadata +141 -52
- data/.travis.yml +0 -39
- data/lib/alexandria/book_providers/adlibris.rb +0 -196
- data/lib/alexandria/book_providers/amazon_aws.rb +0 -252
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +0 -388
- data/lib/alexandria/book_providers/barnes_and_noble.rb +0 -209
- data/lib/alexandria/book_providers/proxis.rb +0 -175
- data/lib/alexandria/book_providers/siciliano.rb +0 -257
- data/lib/alexandria/book_providers/z3950.rb +0 -415
- data/spec/alexandria/ui/ui_utilities_spec.rb +0 -62
- data/spec/alexandria/utilities_spec.rb +0 -52
@@ -41,9 +41,9 @@ module Alexandria
|
|
41
41
|
notes: ["520", "a"]
|
42
42
|
}.freeze
|
43
43
|
|
44
|
-
def self.get_fields(data, type, stripping,
|
44
|
+
def self.get_fields(data, type, stripping, mappings = USMARC_MAPPINGS)
|
45
45
|
field = ""
|
46
|
-
|
46
|
+
mappings[type][1..mappings[type].length - 1].each do |part|
|
47
47
|
if data.first[part]
|
48
48
|
part_data = data.first[part].strip
|
49
49
|
if part_data =~ stripping
|
@@ -58,69 +58,68 @@ module Alexandria
|
|
58
58
|
field
|
59
59
|
end
|
60
60
|
|
61
|
-
def self.marc_text_to_book(marc,
|
61
|
+
def self.marc_text_to_book(marc, mappings = USMARC_MAPPINGS)
|
62
62
|
details = marc_text_to_details(marc)
|
63
|
-
|
64
|
-
title = nil
|
65
|
-
title_data = details[m[:title][0]]
|
66
|
-
if title_data
|
67
|
-
title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, m)
|
68
|
-
title = title_data_all if title_data_all
|
69
|
-
end
|
63
|
+
return if details.empty?
|
70
64
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
author = Regexp.last_match[1] if author =~ /(.*),$/
|
78
|
-
authors << author
|
79
|
-
end
|
80
|
-
end
|
65
|
+
title = nil
|
66
|
+
title_data = details[mappings[:title][0]]
|
67
|
+
if title_data
|
68
|
+
title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, mappings)
|
69
|
+
title = title_data_all if title_data_all
|
70
|
+
end
|
81
71
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
72
|
+
authors = []
|
73
|
+
author_data = details[mappings[:authors][0]]
|
74
|
+
author_data&.each do |ad|
|
75
|
+
author = ad[mappings[:authors][1]]
|
76
|
+
if author
|
77
|
+
author = author.strip
|
78
|
+
author = Regexp.last_match[1] if author =~ /(.*),$/
|
79
|
+
authors << author
|
87
80
|
end
|
81
|
+
end
|
88
82
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
83
|
+
isbn = nil
|
84
|
+
binding = nil
|
85
|
+
isbn_data = details[mappings[:isbn][0]]
|
86
|
+
if isbn_data && isbn_data.first[mappings[:isbn][1]] =~ /([-0-9xX]+)/
|
87
|
+
isbn = Regexp.last_match[1]
|
88
|
+
end
|
95
89
|
|
96
|
-
|
97
|
-
|
98
|
-
|
90
|
+
binding_data = details[mappings[:binding][0]]
|
91
|
+
if binding_data &&
|
92
|
+
binding_data.first[mappings[:binding][1]] =~ /([a-zA-Z][a-z\s]+[a-z])/
|
93
|
+
binding = Regexp.last_match[1]
|
94
|
+
end
|
99
95
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
year = publication_data.first[m[:year][1]]
|
104
|
-
year = Regexp.last_match[1].to_i if year =~ /(\d+)/
|
105
|
-
end
|
96
|
+
publisher = nil
|
97
|
+
publisher_data = details[mappings[:publisher][0]]
|
98
|
+
publisher = publisher_data.first[mappings[:publisher][1]] if publisher_data
|
106
99
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
100
|
+
year = nil
|
101
|
+
publication_data = details[mappings[:year][0]]
|
102
|
+
if publication_data
|
103
|
+
year = publication_data.first[mappings[:year][1]]
|
104
|
+
year = Regexp.last_match[1].to_i if year =~ /(\d+)/
|
105
|
+
end
|
113
106
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
107
|
+
notes = ""
|
108
|
+
notes_data = details[mappings[:notes][0]]
|
109
|
+
notes_data&.each do |note|
|
110
|
+
txt = note[mappings[:notes][1]]
|
111
|
+
notes += txt if txt
|
112
|
+
end
|
118
113
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
book
|
114
|
+
if title.nil? && isbn.nil?
|
115
|
+
# probably didn't undertand the MARC dialect
|
116
|
+
return nil
|
123
117
|
end
|
118
|
+
|
119
|
+
book = Alexandria::Book.new(title, authors, isbn,
|
120
|
+
publisher, year, binding)
|
121
|
+
book.notes = notes unless notes.empty?
|
122
|
+
book
|
124
123
|
end
|
125
124
|
|
126
125
|
def self.marc_text_to_details(marc)
|
@@ -132,31 +131,22 @@ module Alexandria
|
|
132
131
|
|
133
132
|
this_line_data = {}
|
134
133
|
|
135
|
-
# puts code
|
136
|
-
# puts data
|
137
134
|
d_idx = 0
|
138
135
|
while d_idx < data.size
|
139
|
-
d_str = data[d_idx
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
# puts " " + $2
|
148
|
-
# puts idx
|
149
|
-
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
150
|
-
else
|
151
|
-
break
|
152
|
-
end
|
136
|
+
d_str = data[d_idx..]
|
137
|
+
idx = d_str =~ /\$([a-z]) ([^$]+)/
|
138
|
+
break unless idx
|
139
|
+
|
140
|
+
sub_code = Regexp.last_match[1]
|
141
|
+
sub_data = Regexp.last_match[2]
|
142
|
+
this_line_data[sub_code] = sub_data
|
143
|
+
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
153
144
|
end
|
154
145
|
|
155
146
|
unless this_line_data.empty?
|
156
147
|
details[code] = [] unless details.key?(code)
|
157
148
|
details[code] << this_line_data
|
158
149
|
end
|
159
|
-
|
160
150
|
end
|
161
151
|
end
|
162
152
|
details
|
@@ -0,0 +1,108 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This file is part of Alexandria.
|
4
|
+
#
|
5
|
+
# See the file README.md for authorship and licensing information.
|
6
|
+
|
7
|
+
require "alexandria/book_providers/z3950_provider"
|
8
|
+
|
9
|
+
module Alexandria
|
10
|
+
class BookProviders
|
11
|
+
class SBNProvider < Z3950Provider
|
12
|
+
# http://sbnonline.sbn.it/
|
13
|
+
# http://it.wikipedia.org/wiki/ICCU
|
14
|
+
unabstract
|
15
|
+
|
16
|
+
include GetText
|
17
|
+
GetText.bindtextdomain(Alexandria::TEXTDOMAIN, charset: "UTF-8")
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
super("SBN", "Servizio Bibliotecario Nazionale (Italy)")
|
21
|
+
prefs.variable_named("hostname").default_value = "opac.sbn.it"
|
22
|
+
prefs.variable_named("port").default_value = 3950
|
23
|
+
prefs.variable_named("database").default_value = "nopac"
|
24
|
+
# supported 'USMARC', 'UNIMARC' , 'SUTRS'
|
25
|
+
prefs.variable_named("record_syntax").default_value = "USMARC"
|
26
|
+
prefs.variable_named("charset").default_value = "ISO-8859-1"
|
27
|
+
prefs.read
|
28
|
+
end
|
29
|
+
|
30
|
+
def url(book)
|
31
|
+
"http://sbnonline.sbn.it/cgi-bin/zgw/BRIEF.pl?displayquery=" \
|
32
|
+
"%253CB%253E%253Cfont%2520color%253D%2523000064%253E" \
|
33
|
+
"Codice%2520ISBN%253C%2FB%253E%253C%2Ffont%253E%2520" \
|
34
|
+
"contiene%2520%2522%2520%253CFONT%2520COLOR%253Dred%253E" +
|
35
|
+
canonicalise_isbn_with_dashes(book.isbn) +
|
36
|
+
"%253C%2FFONT%253E%2522&session=&zurl=opac" \
|
37
|
+
"&zquery=%281%3D7+4%3D2+2%3D3+5%3D100+6%3D1+3%3D3+%22" +
|
38
|
+
canonicalise_isbn_with_dashes(book.isbn) +
|
39
|
+
"%22%29&language=it&maxentries=10&target=0&position=1"
|
40
|
+
rescue StandardError => ex
|
41
|
+
log.warn { "Cannot create url for book #{book}; #{ex.message}" }
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def canonicalise_criterion(criterion, _type)
|
48
|
+
canonicalise_isbn_with_dashes(criterion)
|
49
|
+
end
|
50
|
+
|
51
|
+
def request_count(_type)
|
52
|
+
0
|
53
|
+
end
|
54
|
+
|
55
|
+
def canonicalise_isbn_with_dashes(isbn)
|
56
|
+
# The reference for the position of the dashes is
|
57
|
+
# http://www.isbn-international.org/converter/ranges.htm
|
58
|
+
|
59
|
+
isbn = Alexandria::Library.canonicalise_isbn(isbn)
|
60
|
+
|
61
|
+
if isbn[0..1] == "88"
|
62
|
+
# Italian speaking area
|
63
|
+
if isbn > "8895000" && (isbn <= "8899999996")
|
64
|
+
isbn[0..1] + "-" + isbn[2..6] + "-" + isbn[7..8] + "-" + isbn[9..9]
|
65
|
+
elsif isbn > "88900000"
|
66
|
+
isbn[0..1] + "-" + isbn[2..7] + "-" + isbn[8..8] + "-" + isbn[9..9]
|
67
|
+
elsif isbn > "8885000"
|
68
|
+
isbn[0..1] + "-" + isbn[2..6] + "-" + isbn[7..8] + "-" + isbn[9..9]
|
69
|
+
elsif isbn > "886000"
|
70
|
+
isbn[0..1] + "-" + isbn[2..5] + "-" + isbn[6..8] + "-" + isbn[9..9]
|
71
|
+
elsif isbn > "88200"
|
72
|
+
isbn[0..1] + "-" + isbn[2..4] + "-" + isbn[5..8] + "-" + isbn[9..9]
|
73
|
+
elsif isbn > "8800"
|
74
|
+
isbn[0..1] + "-" + isbn[2..3] + "-" + isbn[4..8] + "-" + isbn[9..9]
|
75
|
+
else
|
76
|
+
raise _("Invalid ISBN")
|
77
|
+
end
|
78
|
+
|
79
|
+
else
|
80
|
+
isbn
|
81
|
+
end
|
82
|
+
end
|
83
|
+
#
|
84
|
+
# Remarks about SBN
|
85
|
+
#
|
86
|
+
# This provider requires that value of conn.count is 0.
|
87
|
+
# It's a Yaz option "Number of records to be retrieved".
|
88
|
+
# This provider requires to specify the value of conn.element_set_name = 'F'.
|
89
|
+
# It's a Yaz option "Element-Set name of records".
|
90
|
+
# See http://www.indexdata.dk/yaz/doc/zoom.resultsets.tkl
|
91
|
+
#
|
92
|
+
# Dashes:
|
93
|
+
# this database requires that Italian books are searched with dashes :(
|
94
|
+
# However, they have also books with dashes in wrong positions, for
|
95
|
+
# instance 88-061-4934-2
|
96
|
+
#
|
97
|
+
# References:
|
98
|
+
# http://opac.internetculturale.it/cgi-bin/main.cgi?type=field
|
99
|
+
# http://www.internetculturale.it/
|
100
|
+
# http://sbnonline.sbn.it/zgw/homeit.html
|
101
|
+
# http://www.iccu.sbn.it/genera.jsp?id=124
|
102
|
+
# with link at http://www.iccu.sbn.it/upload/documenti/cartecsbn.pdf
|
103
|
+
# http://www.loc.gov/cgi-bin/zgstart?ACTION=INIT&FORM_HOST_PORT=/prod/www/data/z3950/iccu.html,opac.sbn.it,2100
|
104
|
+
# http://gwz.cilea.it/cgi-bin/reportOpac.cgi
|
105
|
+
#
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
@@ -1,27 +1,13 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
#
|
4
|
-
# Copyright (C) 2014 Matijs van Zuijlen
|
3
|
+
# This file is part of Alexandria.
|
5
4
|
#
|
6
|
-
#
|
7
|
-
# modify it under the terms of the GNU General Public License as
|
8
|
-
# published by the Free Software Foundation; either version 2 of the
|
9
|
-
# License, or (at your option) any later version.
|
10
|
-
#
|
11
|
-
# Alexandria is distributed in the hope that it will be useful,
|
12
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
-
# General Public License for more details.
|
15
|
-
#
|
16
|
-
# You should have received a copy of the GNU General Public
|
17
|
-
# License along with Alexandria; see the file COPYING. If not,
|
18
|
-
# write to the Free Software Foundation, Inc., 51 Franklin Street,
|
19
|
-
# Fifth Floor, Boston, MA 02110-1301 USA.
|
5
|
+
# See the file README.md for authorship and licensing information.
|
20
6
|
|
21
7
|
# http://de.wikipedia.org/wiki/Thalia_%28Buchhandel%29
|
22
8
|
# Thalia.de bought the Austrian book trade chain Amadeus
|
23
9
|
|
24
|
-
# New
|
10
|
+
# New Thalia provider, taken from Palatina MetaDataSource and modified
|
25
11
|
# for Alexandria. (21 Dec 2009)
|
26
12
|
|
27
13
|
require "net/http"
|
@@ -31,9 +17,9 @@ require "alexandria/book_providers/web"
|
|
31
17
|
module Alexandria
|
32
18
|
class BookProviders
|
33
19
|
class ThaliaProvider < WebsiteBasedProvider
|
34
|
-
include
|
20
|
+
include Logging
|
35
21
|
|
36
|
-
SITE = "
|
22
|
+
SITE = "https://www.thalia.de"
|
37
23
|
BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s" # type,term
|
38
24
|
|
39
25
|
def initialize
|
@@ -48,7 +34,7 @@ module Alexandria
|
|
48
34
|
|
49
35
|
def search(criterion, type)
|
50
36
|
req = create_search_uri(type, criterion)
|
51
|
-
|
37
|
+
log.debug { req }
|
52
38
|
html_data = transport.get_response(URI.parse(req))
|
53
39
|
if type == SEARCH_BY_ISBN
|
54
40
|
parse_result_data(html_data.body, criterion)
|
@@ -80,40 +66,36 @@ module Alexandria
|
|
80
66
|
def parse_search_result_data(html)
|
81
67
|
doc = html_to_doc(html)
|
82
68
|
book_search_results = []
|
83
|
-
|
84
|
-
|
69
|
+
|
70
|
+
results_items = doc / "ul.weitere-formate li.format"
|
71
|
+
|
72
|
+
results_items.each do |item|
|
85
73
|
result = {}
|
86
|
-
|
87
|
-
result[:
|
88
|
-
result[:lookup_url] = title_link["href"]
|
74
|
+
item_link = item % "a"
|
75
|
+
result[:lookup_url] = "#{SITE}#{item_link['href']}"
|
89
76
|
book_search_results << result
|
90
77
|
end
|
91
78
|
book_search_results
|
92
79
|
end
|
93
80
|
|
94
81
|
def data_from_label(node, label_text)
|
95
|
-
label_node = node % "
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
end
|
101
|
-
data.strip
|
102
|
-
else
|
103
|
-
""
|
104
|
-
end
|
82
|
+
label_node = node % "th[text()*='#{label_text}']"
|
83
|
+
return "" unless label_node
|
84
|
+
|
85
|
+
item_node = label_node.parent % "td"
|
86
|
+
item_node.inner_text.strip
|
105
87
|
end
|
106
88
|
|
107
89
|
def get_book_from_search_result(result)
|
108
90
|
log.debug { "Fetching book from #{result[:lookup_url]}" }
|
109
91
|
html_data = transport.get_response(URI.parse(result[:lookup_url]))
|
110
|
-
parse_result_data(html_data.body, "noisbn", true)
|
92
|
+
parse_result_data(html_data.body, "noisbn", recursing: true)
|
111
93
|
end
|
112
94
|
|
113
|
-
def parse_result_data(html, isbn, recursing
|
95
|
+
def parse_result_data(html, isbn, recursing: false)
|
114
96
|
doc = html_to_doc(html)
|
115
97
|
|
116
|
-
results_divs = doc / "
|
98
|
+
results_divs = doc / "ul.weitere-formate"
|
117
99
|
unless results_divs.empty?
|
118
100
|
if recursing
|
119
101
|
# already recursing, avoid doing so endlessly second time
|
@@ -122,73 +104,54 @@ module Alexandria
|
|
122
104
|
return
|
123
105
|
end
|
124
106
|
|
125
|
-
# ISBN-lookup results in multiple results
|
126
|
-
# useful, such as for new editions e.g. 9780974514055
|
127
|
-
# "Programming Ruby" )
|
107
|
+
# ISBN-lookup results in multiple results
|
128
108
|
results = parse_search_result_data(html)
|
129
|
-
isbn10 = Library.canonicalise_isbn(isbn)
|
130
|
-
# e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html
|
131
109
|
chosen = results.first # fallback!
|
132
|
-
results.each do |rslt|
|
133
|
-
if rslt[:lookup_url] =~ %r{/ISBN(\d+[\d-]*)/}
|
134
|
-
if Regexp.last_match[1].delete("-") == isbn10
|
135
|
-
chosen = rslt
|
136
|
-
break
|
137
|
-
end
|
138
|
-
end
|
139
|
-
end
|
140
110
|
html_data = transport.get_response(URI.parse(chosen[:lookup_url]))
|
141
|
-
return parse_result_data(html_data.body, isbn, true)
|
111
|
+
return parse_result_data(html_data.body, isbn, recursing: true)
|
142
112
|
end
|
143
113
|
|
144
114
|
begin
|
145
|
-
if (div = doc % "
|
146
|
-
|
147
|
-
title = title_img["alt"]
|
148
|
-
|
149
|
-
# note, the following img also has alt="von Author, Author..."
|
115
|
+
if (div = doc % "section#sbe-product-details")
|
116
|
+
title = div["data-titel"]
|
150
117
|
|
151
|
-
if (
|
118
|
+
if (author_p = doc % "p.aim-author")
|
152
119
|
authors = []
|
153
|
-
author_links =
|
120
|
+
author_links = author_p / :a
|
154
121
|
author_links.each do |a|
|
155
|
-
|
156
|
-
# 'sa' means search author, there may also be 'ssw' (search keyword) links
|
157
|
-
authors << a.inner_text[0..-2].strip
|
158
|
-
# NOTE stripping the little >> character here...
|
159
|
-
end
|
122
|
+
authors << a.inner_text.strip
|
160
123
|
end
|
161
124
|
end
|
162
125
|
|
163
|
-
item_details = doc % "
|
126
|
+
item_details = doc % "section.artikeldetails"
|
164
127
|
isbns = []
|
165
128
|
isbns << data_from_label(item_details, "EAN")
|
166
129
|
isbns << data_from_label(item_details, "ISBN")
|
130
|
+
isbns.reject!(&:empty?)
|
167
131
|
|
168
132
|
year = nil
|
169
|
-
date = data_from_label(item_details, "
|
170
|
-
year = Regexp.last_match[1].to_i if date =~ /(
|
133
|
+
date = data_from_label(item_details, "Erscheinungsdatum")
|
134
|
+
year = Regexp.last_match[1].to_i if date =~ /(\d{4})/
|
171
135
|
|
172
|
-
|
136
|
+
book_binding = data_from_label(item_details, "Einband")
|
173
137
|
|
174
|
-
publisher = data_from_label(item_details, "
|
138
|
+
publisher = data_from_label(item_details, "Verlag")
|
175
139
|
|
176
140
|
book = Book.new(title, authors, isbns.first,
|
177
|
-
publisher, year,
|
141
|
+
publisher, year, book_binding)
|
178
142
|
|
179
143
|
image_url = nil
|
180
|
-
if (
|
181
|
-
image_url =
|
144
|
+
if (image = doc % "section.imagesPreview img")
|
145
|
+
image_url = image["src"]
|
182
146
|
end
|
183
147
|
|
184
148
|
[book, image_url]
|
185
|
-
|
186
149
|
end
|
187
150
|
rescue StandardError => ex
|
188
151
|
trace = ex.backtrace.join("\n> ")
|
189
152
|
log.warn do
|
190
153
|
"Failed parsing search results for Thalia " \
|
191
|
-
|
154
|
+
"#{ex.message} #{trace}"
|
192
155
|
end
|
193
156
|
raise NoResultsError
|
194
157
|
end
|
@@ -4,7 +4,7 @@
|
|
4
4
|
#
|
5
5
|
# See the file README.md for authorship and licensing information.
|
6
6
|
|
7
|
-
require "
|
7
|
+
require "nokogiri"
|
8
8
|
require "htmlentities"
|
9
9
|
|
10
10
|
module Alexandria
|
@@ -19,7 +19,7 @@ module Alexandria
|
|
19
19
|
html.force_encoding source_data_charset
|
20
20
|
utf8_html = html.encode("utf-8")
|
21
21
|
normalized_html = @htmlentities.decode(utf8_html)
|
22
|
-
|
22
|
+
Nokogiri.parse(normalized_html)
|
23
23
|
end
|
24
24
|
|
25
25
|
## from Palatina
|
@@ -22,7 +22,7 @@ require "alexandria/book_providers/web"
|
|
22
22
|
module Alexandria
|
23
23
|
class BookProviders
|
24
24
|
class WorldCatProvider < WebsiteBasedProvider
|
25
|
-
include
|
25
|
+
include Logging
|
26
26
|
|
27
27
|
SITE = "https://www.worldcat.org"
|
28
28
|
BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced" # type, term
|
@@ -76,11 +76,11 @@ module Alexandria
|
|
76
76
|
doc = html_to_doc(html, "UTF-8")
|
77
77
|
book_search_results = []
|
78
78
|
begin
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
type_icon =
|
83
|
-
next unless type_icon && type_icon["src"]
|
79
|
+
result_divs = doc / "td.result/div.name"
|
80
|
+
result_divs.each do |div|
|
81
|
+
td = div.parent
|
82
|
+
type_icon = td % "div.type/img.icn"
|
83
|
+
next unless type_icon && type_icon["src"].include?("icon-bks")
|
84
84
|
|
85
85
|
name_div = td % "div.name"
|
86
86
|
title = name_div.inner_text
|
@@ -97,7 +97,7 @@ module Alexandria
|
|
97
97
|
trace = ex.backtrace.join("\n> ")
|
98
98
|
log.warn do
|
99
99
|
"Failed parsing search results for WorldCat " \
|
100
|
-
|
100
|
+
"#{ex.message} #{trace}"
|
101
101
|
end
|
102
102
|
end
|
103
103
|
book_search_results
|
@@ -129,22 +129,20 @@ module Alexandria
|
|
129
129
|
html2 = rslt2.body
|
130
130
|
|
131
131
|
book, cover_url = parse_result_data(html2, search_isbn, true)
|
132
|
-
first_result = [book, cover_url] if first_result.nil?
|
133
132
|
|
134
133
|
log.debug { "got book #{book}" }
|
135
134
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
log.
|
144
|
-
else
|
145
|
-
# no constraint to match isbn, just return first result
|
135
|
+
return [book, cover_url] unless search_isbn
|
136
|
+
|
137
|
+
first_result = [book, cover_url] if first_result.nil?
|
138
|
+
|
139
|
+
search_isbn_canon = Library.canonicalise_ean(search_isbn)
|
140
|
+
rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
|
141
|
+
if search_isbn_canon == rslt_isbn_canon
|
142
|
+
log.info { "book #{book} is a match" }
|
146
143
|
return [book, cover_url]
|
147
144
|
end
|
145
|
+
log.debug { "not a match, checking next" }
|
148
146
|
end
|
149
147
|
|
150
148
|
# gone through all and no ISBN match, so just return first result
|
@@ -152,7 +150,6 @@ module Alexandria
|
|
152
150
|
"no more results to check. Returning first result, just an approximation"
|
153
151
|
end
|
154
152
|
return first_result
|
155
|
-
|
156
153
|
end
|
157
154
|
|
158
155
|
title_header = doc % "h1.title"
|
@@ -174,15 +171,16 @@ module Alexandria
|
|
174
171
|
# can we do better? get the City name?? or multiple publishers?
|
175
172
|
bibdata = doc % "div#bibdata"
|
176
173
|
bibdata_table = bibdata % :table
|
177
|
-
|
174
|
+
publisher_header = bibdata_table % "th[text()*=Publisher]"
|
178
175
|
|
179
|
-
if
|
176
|
+
if publisher_header
|
177
|
+
publisher_row = publisher_header.parent
|
180
178
|
publication_info = (publisher_row / "td").last.inner_text
|
181
179
|
|
182
180
|
publication_info =~ if publication_info.index(";")
|
183
|
-
|
181
|
+
/;\s*([^\d]+)\s*\d*/
|
184
182
|
elsif publication_info.index(":")
|
185
|
-
|
183
|
+
/:\s*([^;:,]+)/
|
186
184
|
else
|
187
185
|
/([^;,]+)/
|
188
186
|
end
|
@@ -195,20 +193,18 @@ module Alexandria
|
|
195
193
|
year = nil
|
196
194
|
end
|
197
195
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
log.warn { "No ISBN found on page" }
|
206
|
-
end
|
196
|
+
isbn_row = doc % "tr#details-standardno"
|
197
|
+
if isbn_row
|
198
|
+
isbns = (isbn_row / "td").last.inner_text.split
|
199
|
+
isbn = Library.canonicalise_isbn(isbns.first)
|
200
|
+
else
|
201
|
+
log.warn { "No ISBN found on page" }
|
202
|
+
isbn = search_isbn
|
207
203
|
end
|
208
204
|
|
209
|
-
|
205
|
+
book_binding = "" # not given on WorldCat website (as far as I can tell)
|
210
206
|
|
211
|
-
book = Book.new(title, authors, isbn, publisher, year,
|
207
|
+
book = Book.new(title, authors, isbn, publisher, year, book_binding)
|
212
208
|
|
213
209
|
image_url = nil # hm, it's on the website, but uses JavaScript...
|
214
210
|
|
@@ -219,11 +215,11 @@ module Alexandria
|
|
219
215
|
trace = ex.backtrace.join("\n> ")
|
220
216
|
log.warn do
|
221
217
|
"Failed parsing search results for WorldCat " \
|
222
|
-
|
218
|
+
"#{ex.message} #{trace}"
|
223
219
|
end
|
224
220
|
raise NoResultsError
|
225
221
|
end
|
226
222
|
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|