alexandria-book-collection-manager 0.7.5 → 0.7.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +9 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +51 -29
- data/.rubocop_todo.yml +33 -155
- data/.simplecov +5 -2
- data/.travis.yml +10 -4
- data/CHANGELOG.md +19 -0
- data/INSTALL.md +23 -11
- data/README.md +36 -35
- data/Rakefile +7 -5
- data/alexandria-book-collection-manager.gemspec +8 -3
- data/doc/dependency_decisions.yml +22 -3
- data/lib/alexandria.rb +2 -2
- data/lib/alexandria/book_providers.rb +47 -49
- data/lib/alexandria/book_providers/adlibris.rb +8 -13
- data/lib/alexandria/book_providers/amazon_aws.rb +47 -60
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +283 -298
- data/lib/alexandria/book_providers/barnes_and_noble.rb +8 -8
- data/lib/alexandria/book_providers/douban.rb +2 -2
- data/lib/alexandria/book_providers/proxis.rb +12 -11
- data/lib/alexandria/book_providers/pseudomarc.rb +60 -70
- data/lib/alexandria/book_providers/siciliano.rb +5 -6
- data/lib/alexandria/book_providers/thalia.rb +8 -9
- data/lib/alexandria/book_providers/worldcat.rb +25 -31
- data/lib/alexandria/book_providers/z3950.rb +62 -69
- data/lib/alexandria/default_preferences.rb +37 -0
- data/lib/alexandria/execution_queue.rb +13 -12
- data/lib/alexandria/export_library.rb +4 -8
- data/lib/alexandria/import_library.rb +38 -62
- data/lib/alexandria/import_library_csv.rb +16 -16
- data/lib/alexandria/library_sort_order.rb +3 -1
- data/lib/alexandria/library_store.rb +16 -16
- data/lib/alexandria/logging.rb +4 -8
- data/lib/alexandria/models/book.rb +2 -2
- data/lib/alexandria/models/library.rb +18 -14
- data/lib/alexandria/net.rb +1 -2
- data/lib/alexandria/preferences.rb +27 -31
- data/lib/alexandria/scanners.rb +2 -2
- data/lib/alexandria/scanners/cue_cat.rb +5 -5
- data/lib/alexandria/scanners/keyboard.rb +1 -1
- data/lib/alexandria/smart_library.rb +22 -26
- data/lib/alexandria/ui.rb +7 -7
- data/lib/alexandria/ui/about_dialog.rb +1 -1
- data/lib/alexandria/ui/acquire_dialog.rb +9 -10
- data/lib/alexandria/ui/alert_dialog.rb +34 -19
- data/lib/alexandria/ui/bad_isbns_dialog.rb +13 -9
- data/lib/alexandria/ui/barcode_animation.rb +5 -5
- data/lib/alexandria/ui/book_properties_dialog.rb +2 -2
- data/lib/alexandria/ui/book_properties_dialog_base.rb +23 -17
- data/lib/alexandria/ui/callbacks.rb +141 -120
- data/lib/alexandria/ui/completion_models.rb +1 -1
- data/lib/alexandria/ui/confirm_erase_dialog.rb +1 -1
- data/lib/alexandria/ui/conflict_while_copying_dialog.rb +1 -1
- data/lib/alexandria/ui/error_dialog.rb +1 -1
- data/lib/alexandria/ui/export_dialog.rb +13 -15
- data/lib/alexandria/ui/icons.rb +34 -40
- data/lib/alexandria/ui/iconview_tooltips.rb +40 -53
- data/lib/alexandria/ui/import_dialog.rb +48 -47
- data/lib/alexandria/ui/init.rb +3 -2
- data/lib/alexandria/ui/keep_bad_isbn_dialog.rb +2 -2
- data/lib/alexandria/ui/libraries_combo.rb +10 -9
- data/lib/alexandria/ui/listview.rb +5 -6
- data/lib/alexandria/ui/main_app.rb +2 -2
- data/lib/alexandria/ui/multi_drag_treeview.rb +4 -6
- data/lib/alexandria/ui/new_book_dialog.rb +52 -52
- data/lib/alexandria/ui/new_provider_dialog.rb +12 -11
- data/lib/alexandria/ui/new_smart_library_dialog.rb +39 -27
- data/lib/alexandria/ui/preferences_dialog.rb +23 -82
- data/lib/alexandria/ui/provider_preferences_base_dialog.rb +9 -5
- data/lib/alexandria/ui/provider_preferences_dialog.rb +5 -5
- data/lib/alexandria/ui/really_delete_dialog.rb +1 -1
- data/lib/alexandria/ui/sidepane_manager.rb +35 -37
- data/lib/alexandria/ui/skip_entry_dialog.rb +3 -2
- data/lib/alexandria/ui/smart_library_properties_dialog.rb +35 -36
- data/lib/alexandria/ui/smart_library_properties_dialog_base.rb +59 -138
- data/lib/alexandria/ui/smart_library_rule_box.rb +119 -0
- data/lib/alexandria/ui/sound.rb +4 -6
- data/lib/alexandria/ui/ui_manager.rb +62 -64
- data/lib/alexandria/version.rb +2 -2
- data/lib/alexandria/web_themes.rb +15 -15
- data/po/cs.po +991 -874
- data/po/cy.po +957 -870
- data/po/de.po +991 -866
- data/po/el.po +987 -863
- data/po/es.po +986 -862
- data/po/fr.po +988 -868
- data/po/ga.po +910 -822
- data/po/gl.po +983 -863
- data/po/it.po +984 -862
- data/po/ja.po +969 -849
- data/po/mk.po +983 -859
- data/po/nb.po +982 -862
- data/po/nl.po +992 -869
- data/po/pl.po +1018 -963
- data/po/pt.po +983 -852
- data/po/pt_BR.po +983 -863
- data/po/ru.po +992 -869
- data/po/sk.po +986 -864
- data/po/sv.po +980 -860
- data/po/uk.po +975 -861
- data/po/zh_TW.po +974 -854
- data/share/alexandria/glade/main_app__builder.glade +6 -21
- data/share/gnome/help/alexandria/C/smart-libraries.xml +2 -2
- data/share/gnome/help/alexandria/C/working-with-libraries.xml +1 -1
- data/share/gnome/help/alexandria/fr/alexandria.xml +1 -1
- data/share/gnome/help/alexandria/ja/smart-libraries.xml +1 -1
- data/spec/alexandria/book_providers/world_cat_provider_spec.rb +160 -0
- data/spec/alexandria/book_providers_spec.rb +73 -129
- data/spec/alexandria/console_spec.rb +0 -5
- data/spec/alexandria/export_library_spec.rb +27 -38
- data/spec/alexandria/library_spec.rb +56 -44
- data/spec/alexandria/preferences_spec.rb +29 -3
- data/spec/alexandria/scanners/cue_cat_spec.rb +1 -1
- data/spec/alexandria/ui/about_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/acquire_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/alert_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/book_properties_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/error_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/export_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/icons_spec.rb +26 -0
- data/spec/alexandria/ui/iconview_spec.rb +1 -1
- data/spec/alexandria/ui/import_dialog_spec.rb +30 -3
- data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/main_app_spec.rb +1 -1
- data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +1 -1
- data/spec/alexandria/ui/new_provider_dialog_spec.rb +19 -3
- data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +28 -3
- data/spec/alexandria/ui/preferences_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +23 -8
- data/spec/alexandria/ui/really_delete_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/sidepane_manager_spec.rb +2 -2
- data/spec/alexandria/ui/skip_entry_dialog_spec.rb +1 -1
- data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +21 -7
- data/spec/alexandria/ui/ui_manager_spec.rb +39 -2
- data/spec/spec_helper.rb +46 -3
- data/tasks/spec.rake +3 -5
- data/util/rake/fileinstall.rb +12 -11
- metadata +82 -10
- data/spec/alexandria/ui/ui_utilities_spec.rb +0 -62
- data/spec/alexandria/utilities_spec.rb +0 -52
@@ -35,7 +35,7 @@ require "alexandria/book_providers/web"
|
|
35
35
|
module Alexandria
|
36
36
|
class BookProviders
|
37
37
|
class BarnesAndNobleProvider < WebsiteBasedProvider
|
38
|
-
include
|
38
|
+
include Logging
|
39
39
|
|
40
40
|
SITE = "http://www.barnesandnoble.com"
|
41
41
|
|
@@ -56,7 +56,7 @@ module Alexandria
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def fetch_redirectly(uri_str, limit = 5)
|
59
|
-
raise NoResultsError, "HTTP redirect too deep" if limit.zero?
|
59
|
+
raise NoResultsError, _("HTTP redirect too deep") if limit.zero?
|
60
60
|
|
61
61
|
if limit < 10
|
62
62
|
sleep 0.1
|
@@ -67,8 +67,8 @@ module Alexandria
|
|
67
67
|
response = agent.get(uri_str)
|
68
68
|
log.debug { response.inspect }
|
69
69
|
case response
|
70
|
-
when Net::HTTPSuccess
|
71
|
-
when Net::HTTPRedirection
|
70
|
+
when Net::HTTPSuccess then response
|
71
|
+
when Net::HTTPRedirection
|
72
72
|
redirect = URI.parse response["Location"]
|
73
73
|
redirect = URI.parse(uri_str) + redirect if redirect.relative?
|
74
74
|
fetch_redirectly(redirect.to_s, (limit - 1))
|
@@ -79,7 +79,7 @@ module Alexandria
|
|
79
79
|
|
80
80
|
def search(criterion, type)
|
81
81
|
req = create_search_uri(type, criterion)
|
82
|
-
|
82
|
+
log.debug { "Requesting #{req}" }
|
83
83
|
html_data = fetch_redirectly(req)
|
84
84
|
|
85
85
|
if type == SEARCH_BY_ISBN
|
@@ -204,6 +204,6 @@ module Alexandria
|
|
204
204
|
raise NoResultsError
|
205
205
|
end
|
206
206
|
end
|
207
|
-
end
|
208
|
-
end
|
209
|
-
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
@@ -16,7 +16,7 @@ require "yaml"
|
|
16
16
|
module Alexandria
|
17
17
|
class BookProviders
|
18
18
|
class DoubanProvider < GenericProvider
|
19
|
-
include
|
19
|
+
include Logging
|
20
20
|
|
21
21
|
SITE = "http://www.douban.com"
|
22
22
|
BASE_URL = "http://api.douban.com/book/subjects?q=%s&max-results=5&alt=json"
|
@@ -58,7 +58,7 @@ module Alexandria
|
|
58
58
|
def json2yaml(json)
|
59
59
|
# insert spaces after : and , except within strings
|
60
60
|
# i.e. when followed by numeral, quote, { or [
|
61
|
-
yaml = json.gsub(/(
|
61
|
+
yaml = json.gsub(/(:|,)([0-9'"{\[])/) do |_match|
|
62
62
|
"#{Regexp.last_match[1]} #{Regexp.last_match[2]}"
|
63
63
|
end
|
64
64
|
yaml.gsub!(%r{\\/}, "/") # unescape forward slashes
|
@@ -14,7 +14,7 @@ module Alexandria
|
|
14
14
|
class BookProviders
|
15
15
|
class ProxisProvider < WebsiteBasedProvider
|
16
16
|
# include GetText
|
17
|
-
include
|
17
|
+
include Logging
|
18
18
|
# GetText.bindtextdomain(Alexandria::TEXTDOMAIN, :charset => "UTF-8")
|
19
19
|
|
20
20
|
# Proxis essentially has three book databases, NL, FR and EN.
|
@@ -37,7 +37,7 @@ module Alexandria
|
|
37
37
|
|
38
38
|
def search(criterion, type)
|
39
39
|
req = create_search_uri(type, criterion)
|
40
|
-
|
40
|
+
log.debug { req }
|
41
41
|
html_data = transport.get_response(URI.parse(req))
|
42
42
|
|
43
43
|
results = parse_search_result_data(html_data.body)
|
@@ -96,7 +96,7 @@ module Alexandria
|
|
96
96
|
if title_link
|
97
97
|
result[:title] = text_of(title_link)
|
98
98
|
result[:lookup_url] = title_link["href"]
|
99
|
-
unless
|
99
|
+
unless result[:lookup_url].start_with?("http")
|
100
100
|
result[:lookup_url] = "#{SITE}#{result[:lookup_url]}"
|
101
101
|
end
|
102
102
|
end
|
@@ -108,8 +108,8 @@ module Alexandria
|
|
108
108
|
book_search_results
|
109
109
|
end
|
110
110
|
|
111
|
-
def data_for_header(
|
112
|
-
tr =
|
111
|
+
def data_for_header(header)
|
112
|
+
tr = header.parent
|
113
113
|
td = tr.at("td")
|
114
114
|
text_of(td) if td
|
115
115
|
end
|
@@ -141,15 +141,16 @@ module Alexandria
|
|
141
141
|
unless info_headers.empty?
|
142
142
|
info_headers.each do |th|
|
143
143
|
header_text = th.inner_text
|
144
|
-
|
144
|
+
case header_text
|
145
|
+
when /Type/
|
145
146
|
book_data[:binding] = data_for_header(th)
|
146
|
-
|
147
|
+
when /Verschijningsdatum/
|
147
148
|
date = data_for_header(th)
|
148
|
-
date =~ %r{/(
|
149
|
+
date =~ %r{/(\d{4})}
|
149
150
|
book_data[:publish_year] = Regexp.last_match[1].to_i
|
150
|
-
|
151
|
+
when /Auteur/
|
151
152
|
book_data[:authors] << data_for_header(th)
|
152
|
-
|
153
|
+
when /Uitgever/
|
153
154
|
book_data[:publisher] = data_for_header(th)
|
154
155
|
end
|
155
156
|
end
|
@@ -157,7 +158,7 @@ module Alexandria
|
|
157
158
|
|
158
159
|
image_url = nil
|
159
160
|
if (cover_img = doc.at("img[@id$='imgProduct']"))
|
160
|
-
image_url = if
|
161
|
+
image_url = if cover_img["src"].start_with?("http")
|
161
162
|
cover_img["src"]
|
162
163
|
else
|
163
164
|
"#{SITE}/#{cover_img['src']}" # TODO: use html <base>
|
@@ -41,9 +41,9 @@ module Alexandria
|
|
41
41
|
notes: ["520", "a"]
|
42
42
|
}.freeze
|
43
43
|
|
44
|
-
def self.get_fields(data, type, stripping,
|
44
|
+
def self.get_fields(data, type, stripping, mappings = USMARC_MAPPINGS)
|
45
45
|
field = ""
|
46
|
-
|
46
|
+
mappings[type][1..mappings[type].length - 1].each do |part|
|
47
47
|
if data.first[part]
|
48
48
|
part_data = data.first[part].strip
|
49
49
|
if part_data =~ stripping
|
@@ -58,69 +58,68 @@ module Alexandria
|
|
58
58
|
field
|
59
59
|
end
|
60
60
|
|
61
|
-
def self.marc_text_to_book(marc,
|
61
|
+
def self.marc_text_to_book(marc, mappings = USMARC_MAPPINGS)
|
62
62
|
details = marc_text_to_details(marc)
|
63
|
-
|
64
|
-
title = nil
|
65
|
-
title_data = details[m[:title][0]]
|
66
|
-
if title_data
|
67
|
-
title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, m)
|
68
|
-
title = title_data_all if title_data_all
|
69
|
-
end
|
63
|
+
return if details.empty?
|
70
64
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
author = Regexp.last_match[1] if author =~ /(.*),$/
|
78
|
-
authors << author
|
79
|
-
end
|
80
|
-
end
|
65
|
+
title = nil
|
66
|
+
title_data = details[mappings[:title][0]]
|
67
|
+
if title_data
|
68
|
+
title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, mappings)
|
69
|
+
title = title_data_all if title_data_all
|
70
|
+
end
|
81
71
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
72
|
+
authors = []
|
73
|
+
author_data = details[mappings[:authors][0]]
|
74
|
+
author_data&.each do |ad|
|
75
|
+
author = ad[mappings[:authors][1]]
|
76
|
+
if author
|
77
|
+
author = author.strip
|
78
|
+
author = Regexp.last_match[1] if author =~ /(.*),$/
|
79
|
+
authors << author
|
87
80
|
end
|
81
|
+
end
|
88
82
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
83
|
+
isbn = nil
|
84
|
+
binding = nil
|
85
|
+
isbn_data = details[mappings[:isbn][0]]
|
86
|
+
if isbn_data && isbn_data.first[mappings[:isbn][1]] =~ /([-0-9xX]+)/
|
87
|
+
isbn = Regexp.last_match[1]
|
88
|
+
end
|
95
89
|
|
96
|
-
|
97
|
-
|
98
|
-
|
90
|
+
binding_data = details[mappings[:binding][0]]
|
91
|
+
if binding_data &&
|
92
|
+
binding_data.first[mappings[:binding][1]] =~ /([a-zA-Z][a-z\s]+[a-z])/
|
93
|
+
binding = Regexp.last_match[1]
|
94
|
+
end
|
99
95
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
year = publication_data.first[m[:year][1]]
|
104
|
-
year = Regexp.last_match[1].to_i if year =~ /(\d+)/
|
105
|
-
end
|
96
|
+
publisher = nil
|
97
|
+
publisher_data = details[mappings[:publisher][0]]
|
98
|
+
publisher = publisher_data.first[mappings[:publisher][1]] if publisher_data
|
106
99
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
100
|
+
year = nil
|
101
|
+
publication_data = details[mappings[:year][0]]
|
102
|
+
if publication_data
|
103
|
+
year = publication_data.first[mappings[:year][1]]
|
104
|
+
year = Regexp.last_match[1].to_i if year =~ /(\d+)/
|
105
|
+
end
|
113
106
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
107
|
+
notes = ""
|
108
|
+
notes_data = details[mappings[:notes][0]]
|
109
|
+
notes_data&.each do |note|
|
110
|
+
txt = note[mappings[:notes][1]]
|
111
|
+
notes += txt if txt
|
112
|
+
end
|
118
113
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
book
|
114
|
+
if title.nil? && isbn.nil?
|
115
|
+
# probably didn't undertand the MARC dialect
|
116
|
+
return nil
|
123
117
|
end
|
118
|
+
|
119
|
+
book = Alexandria::Book.new(title, authors, isbn,
|
120
|
+
publisher, year, binding)
|
121
|
+
book.notes = notes unless notes.empty?
|
122
|
+
book
|
124
123
|
end
|
125
124
|
|
126
125
|
def self.marc_text_to_details(marc)
|
@@ -132,31 +131,22 @@ module Alexandria
|
|
132
131
|
|
133
132
|
this_line_data = {}
|
134
133
|
|
135
|
-
# puts code
|
136
|
-
# puts data
|
137
134
|
d_idx = 0
|
138
135
|
while d_idx < data.size
|
139
136
|
d_str = data[d_idx..-1]
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
# puts " " + $2
|
148
|
-
# puts idx
|
149
|
-
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
150
|
-
else
|
151
|
-
break
|
152
|
-
end
|
137
|
+
idx = d_str =~ /\$([a-z]) ([^$]+)/
|
138
|
+
break unless idx
|
139
|
+
|
140
|
+
sub_code = Regexp.last_match[1]
|
141
|
+
sub_data = Regexp.last_match[2]
|
142
|
+
this_line_data[sub_code] = sub_data
|
143
|
+
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
153
144
|
end
|
154
145
|
|
155
146
|
unless this_line_data.empty?
|
156
147
|
details[code] = [] unless details.key?(code)
|
157
148
|
details[code] << this_line_data
|
158
149
|
end
|
159
|
-
|
160
150
|
end
|
161
151
|
end
|
162
152
|
details
|
@@ -75,9 +75,9 @@ module Alexandria
|
|
75
75
|
if (type == SEARCH_BY_ISBN) && (trying_again == false)
|
76
76
|
trying_again = true
|
77
77
|
retry
|
78
|
-
else
|
79
|
-
raise ex
|
80
78
|
end
|
79
|
+
|
80
|
+
raise ex
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
@@ -143,7 +143,7 @@ module Alexandria
|
|
143
143
|
result[:title] = link.inner_text.strip
|
144
144
|
link_to_description = link["href"]
|
145
145
|
slash = ""
|
146
|
-
slash = "/" unless
|
146
|
+
slash = "/" unless link_to_description.start_with?("/")
|
147
147
|
result[:url] = "#{SITE}#{slash}#{link_to_description}"
|
148
148
|
|
149
149
|
book_search_results << result
|
@@ -194,15 +194,14 @@ module Alexandria
|
|
194
194
|
|
195
195
|
script.children.each do |ch|
|
196
196
|
ch_text = ch.to_s
|
197
|
-
if ch_text =~ /ImgSrc\[
|
197
|
+
if ch_text =~ /ImgSrc\[\d\]="(.+)";/
|
198
198
|
img_link = Regexp.last_match[1]
|
199
199
|
image_urls << img_link
|
200
200
|
end
|
201
201
|
end
|
202
202
|
end
|
203
203
|
book = Book.new(title, authors, isbn, publisher, publish_year, binding)
|
204
|
-
|
205
|
-
result
|
204
|
+
[book, image_urls.first]
|
206
205
|
rescue StandardError => ex
|
207
206
|
trace = ex.backtrace.join("\n> ")
|
208
207
|
log.error { "Failed parsing Siciliano product page #{ex.message}\n#{trace}" }
|
@@ -31,7 +31,7 @@ require "alexandria/book_providers/web"
|
|
31
31
|
module Alexandria
|
32
32
|
class BookProviders
|
33
33
|
class ThaliaProvider < WebsiteBasedProvider
|
34
|
-
include
|
34
|
+
include Logging
|
35
35
|
|
36
36
|
SITE = "http://www.thalia.de"
|
37
37
|
BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s" # type,term
|
@@ -48,7 +48,7 @@ module Alexandria
|
|
48
48
|
|
49
49
|
def search(criterion, type)
|
50
50
|
req = create_search_uri(type, criterion)
|
51
|
-
|
51
|
+
log.debug { req }
|
52
52
|
html_data = transport.get_response(URI.parse(req))
|
53
53
|
if type == SEARCH_BY_ISBN
|
54
54
|
parse_result_data(html_data.body, criterion)
|
@@ -130,11 +130,10 @@ module Alexandria
|
|
130
130
|
# e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html
|
131
131
|
chosen = results.first # fallback!
|
132
132
|
results.each do |rslt|
|
133
|
-
if rslt[:lookup_url] =~ %r{/ISBN(\d+[\d-]*)/}
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
end
|
133
|
+
if rslt[:lookup_url] =~ %r{/ISBN(\d+[\d-]*)/} &&
|
134
|
+
(Regexp.last_match[1].delete("-") == isbn10)
|
135
|
+
chosen = rslt
|
136
|
+
break
|
138
137
|
end
|
139
138
|
end
|
140
139
|
html_data = transport.get_response(URI.parse(chosen[:lookup_url]))
|
@@ -152,7 +151,7 @@ module Alexandria
|
|
152
151
|
authors = []
|
153
152
|
author_links = author_h.parent / :a
|
154
153
|
author_links.each do |a|
|
155
|
-
if a["href"]
|
154
|
+
if a["href"].include? "BUCH/sa"
|
156
155
|
# 'sa' means search author, there may also be 'ssw' (search keyword) links
|
157
156
|
authors << a.inner_text[0..-2].strip
|
158
157
|
# NOTE stripping the little >> character here...
|
@@ -167,7 +166,7 @@ module Alexandria
|
|
167
166
|
|
168
167
|
year = nil
|
169
168
|
date = data_from_label(item_details, "Erschienen:")
|
170
|
-
year = Regexp.last_match[1].to_i if date =~ /(
|
169
|
+
year = Regexp.last_match[1].to_i if date =~ /(\d{4})/
|
171
170
|
|
172
171
|
binding = data_from_label(item_details, "Einband")
|
173
172
|
|
@@ -22,7 +22,7 @@ require "alexandria/book_providers/web"
|
|
22
22
|
module Alexandria
|
23
23
|
class BookProviders
|
24
24
|
class WorldCatProvider < WebsiteBasedProvider
|
25
|
-
include
|
25
|
+
include Logging
|
26
26
|
|
27
27
|
SITE = "https://www.worldcat.org"
|
28
28
|
BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced" # type, term
|
@@ -77,10 +77,9 @@ module Alexandria
|
|
77
77
|
book_search_results = []
|
78
78
|
begin
|
79
79
|
result_cells = doc / "td.result/div.name/.."
|
80
|
-
# puts result_cells.length
|
81
80
|
result_cells.each do |td|
|
82
81
|
type_icon = (td % "div.type/img.icn")
|
83
|
-
next unless type_icon && type_icon["src"]
|
82
|
+
next unless type_icon && type_icon["src"].include?("icon-bks")
|
84
83
|
|
85
84
|
name_div = td % "div.name"
|
86
85
|
title = name_div.inner_text
|
@@ -129,22 +128,20 @@ module Alexandria
|
|
129
128
|
html2 = rslt2.body
|
130
129
|
|
131
130
|
book, cover_url = parse_result_data(html2, search_isbn, true)
|
132
|
-
first_result = [book, cover_url] if first_result.nil?
|
133
131
|
|
134
132
|
log.debug { "got book #{book}" }
|
135
133
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
log.
|
144
|
-
else
|
145
|
-
# no constraint to match isbn, just return first result
|
134
|
+
return [book, cover_url] unless search_isbn
|
135
|
+
|
136
|
+
first_result = [book, cover_url] if first_result.nil?
|
137
|
+
|
138
|
+
search_isbn_canon = Library.canonicalise_ean(search_isbn)
|
139
|
+
rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
|
140
|
+
if search_isbn_canon == rslt_isbn_canon
|
141
|
+
log.info { "book #{book} is a match" }
|
146
142
|
return [book, cover_url]
|
147
143
|
end
|
144
|
+
log.debug { "not a match, checking next" }
|
148
145
|
end
|
149
146
|
|
150
147
|
# gone through all and no ISBN match, so just return first result
|
@@ -152,7 +149,6 @@ module Alexandria
|
|
152
149
|
"no more results to check. Returning first result, just an approximation"
|
153
150
|
end
|
154
151
|
return first_result
|
155
|
-
|
156
152
|
end
|
157
153
|
|
158
154
|
title_header = doc % "h1.title"
|
@@ -180,9 +176,9 @@ module Alexandria
|
|
180
176
|
publication_info = (publisher_row / "td").last.inner_text
|
181
177
|
|
182
178
|
publication_info =~ if publication_info.index(";")
|
183
|
-
|
179
|
+
/;\s*([^\d]+)\s*\d*/
|
184
180
|
elsif publication_info.index(":")
|
185
|
-
|
181
|
+
/:\s*([^;:,]+)/
|
186
182
|
else
|
187
183
|
/([^;,]+)/
|
188
184
|
end
|
@@ -195,20 +191,18 @@ module Alexandria
|
|
195
191
|
year = nil
|
196
192
|
end
|
197
193
|
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
log.warn { "No ISBN found on page" }
|
206
|
-
end
|
194
|
+
isbn_row = doc % "tr#details-standardno"
|
195
|
+
if isbn_row
|
196
|
+
isbns = (isbn_row / "td").last.inner_text.split
|
197
|
+
isbn = Library.canonicalise_isbn(isbns.first)
|
198
|
+
else
|
199
|
+
log.warn { "No ISBN found on page" }
|
200
|
+
isbn = search_isbn
|
207
201
|
end
|
208
202
|
|
209
|
-
|
203
|
+
book_binding = "" # not given on WorldCat website (as far as I can tell)
|
210
204
|
|
211
|
-
book = Book.new(title, authors, isbn, publisher, year,
|
205
|
+
book = Book.new(title, authors, isbn, publisher, year, book_binding)
|
212
206
|
|
213
207
|
image_url = nil # hm, it's on the website, but uses JavaScript...
|
214
208
|
|
@@ -224,6 +218,6 @@ module Alexandria
|
|
224
218
|
raise NoResultsError
|
225
219
|
end
|
226
220
|
end
|
227
|
-
end
|
228
|
-
end
|
229
|
-
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|