alexandria-book-collection-manager 0.7.2 → 0.7.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/dependabot.yml +9 -0
- data/.github/workflows/ruby.yml +77 -0
- data/.gitignore +5 -1
- data/.hound.yml +2 -0
- data/.rubocop.yml +87 -37
- data/.rubocop_todo.yml +62 -191
- data/.simplecov +5 -2
- data/CHANGELOG.md +63 -0
- data/Gemfile +4 -3
- data/INSTALL.md +26 -14
- data/README.md +52 -42
- data/Rakefile +93 -109
- data/TODO.md +9 -1
- data/alexandria-book-collection-manager.gemspec +50 -43
- data/bin/alexandria +30 -53
- data/doc/FAQ +2 -6
- data/doc/dependency_decisions.yml +27 -8
- data/lib/alexandria.rb +27 -37
- data/lib/alexandria/about.rb +50 -50
- data/lib/alexandria/book_providers.rb +90 -97
- data/lib/alexandria/book_providers/adlibris.rb +41 -76
- data/lib/alexandria/book_providers/amazon_aws.rb +96 -100
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +295 -322
- data/lib/alexandria/book_providers/barnes_and_noble.rb +48 -45
- data/lib/alexandria/book_providers/douban.rb +26 -42
- data/lib/alexandria/book_providers/proxis.rb +44 -55
- data/lib/alexandria/book_providers/pseudomarc.rb +77 -85
- data/lib/alexandria/book_providers/siciliano.rb +64 -65
- data/lib/alexandria/book_providers/thalia.rb +42 -41
- data/lib/alexandria/book_providers/web.rb +15 -33
- data/lib/alexandria/book_providers/worldcat.rb +70 -97
- data/lib/alexandria/book_providers/z3950.rb +160 -173
- data/lib/alexandria/config.rb +1 -1
- data/lib/alexandria/console.rb +8 -21
- data/lib/alexandria/default_preferences.rb +37 -0
- data/lib/alexandria/execution_queue.rb +15 -13
- data/lib/alexandria/export_format.rb +47 -0
- data/lib/alexandria/export_library.rb +193 -300
- data/lib/alexandria/import_library.rb +108 -141
- data/lib/alexandria/import_library_csv.rb +43 -46
- data/lib/alexandria/library_collection.rb +79 -0
- data/lib/alexandria/library_sort_order.rb +45 -0
- data/lib/alexandria/library_store.rb +233 -0
- data/lib/alexandria/logging.rb +11 -13
- data/lib/alexandria/models/book.rb +13 -20
- data/lib/alexandria/models/library.rb +81 -353
- data/lib/alexandria/net.rb +5 -6
- data/lib/alexandria/preferences.rb +73 -87
- data/lib/alexandria/scanners.rb +2 -2
- data/lib/alexandria/scanners/{cuecat.rb → cue_cat.rb} +20 -18
- data/lib/alexandria/scanners/keyboard.rb +8 -8
- data/lib/alexandria/smart_library.rb +133 -170
- data/lib/alexandria/ui.rb +15 -15
- data/lib/alexandria/ui/about_dialog.rb +49 -0
- data/lib/alexandria/ui/{dialogs/acquire_dialog.rb → acquire_dialog.rb} +119 -136
- data/lib/alexandria/ui/alert_dialog.rb +64 -0
- data/lib/alexandria/ui/bad_isbns_dialog.rb +41 -0
- data/lib/alexandria/ui/{dialogs/barcode_animation.rb → barcode_animation.rb} +16 -15
- data/lib/alexandria/ui/{dialogs/book_properties_dialog.rb → book_properties_dialog.rb} +39 -52
- data/lib/alexandria/ui/book_properties_dialog_base.rb +318 -0
- data/lib/alexandria/ui/builder_base.rb +7 -27
- data/lib/alexandria/ui/calendar_popup.rb +58 -0
- data/lib/alexandria/ui/callbacks.rb +189 -183
- data/lib/alexandria/ui/completion_models.rb +10 -23
- data/lib/alexandria/ui/confirm_erase_dialog.rb +33 -0
- data/lib/alexandria/ui/conflict_while_copying_dialog.rb +34 -0
- data/lib/alexandria/ui/dndable.rb +7 -7
- data/lib/alexandria/ui/error_dialog.rb +25 -0
- data/lib/alexandria/ui/export_dialog.rb +142 -0
- data/lib/alexandria/ui/icons.rb +47 -63
- data/lib/alexandria/ui/iconview.rb +12 -10
- data/lib/alexandria/ui/iconview_tooltips.rb +41 -54
- data/lib/alexandria/ui/import_dialog.rb +157 -0
- data/lib/alexandria/ui/init.rb +21 -33
- data/lib/alexandria/ui/keep_bad_isbn_dialog.rb +36 -0
- data/lib/alexandria/ui/libraries_combo.rb +16 -14
- data/lib/alexandria/ui/listview.rb +73 -87
- data/lib/alexandria/ui/main_app.rb +24 -26
- data/lib/alexandria/ui/misc_dialogs.rb +10 -0
- data/lib/alexandria/ui/multi_drag_treeview.rb +28 -41
- data/lib/alexandria/ui/{dialogs/new_book_dialog.rb → new_book_dialog.rb} +156 -194
- data/lib/alexandria/ui/new_book_dialog_manual.rb +139 -0
- data/lib/alexandria/ui/new_provider_dialog.rb +100 -0
- data/lib/alexandria/ui/new_smart_library_dialog.rb +74 -0
- data/lib/alexandria/ui/preferences_dialog.rb +313 -0
- data/lib/alexandria/ui/provider_preferences_base_dialog.rb +95 -0
- data/lib/alexandria/ui/provider_preferences_dialog.rb +35 -0
- data/lib/alexandria/ui/really_delete_dialog.rb +53 -0
- data/lib/alexandria/ui/{sidepane.rb → sidepane_manager.rb} +56 -68
- data/lib/alexandria/ui/skip_entry_dialog.rb +33 -0
- data/lib/alexandria/ui/smart_library_properties_dialog.rb +60 -0
- data/lib/alexandria/ui/smart_library_properties_dialog_base.rb +242 -0
- data/lib/alexandria/ui/smart_library_rule_box.rb +119 -0
- data/lib/alexandria/ui/sound.rb +11 -13
- data/lib/alexandria/ui/ui_manager.rb +236 -251
- data/lib/alexandria/undo_manager.rb +1 -0
- data/lib/alexandria/version.rb +4 -19
- data/lib/alexandria/web_themes.rb +22 -21
- data/po/Makefile +2 -2
- data/po/cs.po +993 -880
- data/po/cy.po +957 -874
- data/po/de.po +990 -869
- data/po/el.po +989 -869
- data/po/es.po +985 -865
- data/po/fr.po +986 -870
- data/po/ga.po +907 -823
- data/po/gl.po +981 -865
- data/po/it.po +986 -868
- data/po/ja.po +969 -853
- data/po/mk.po +983 -863
- data/po/nb.po +979 -863
- data/po/nl.po +983 -864
- data/po/pl.po +1017 -974
- data/po/pt.po +988 -861
- data/po/pt_BR.po +984 -868
- data/po/ru.po +992 -873
- data/po/sk.po +987 -869
- data/po/sv.po +977 -861
- data/po/uk.po +975 -865
- data/po/zh_TW.po +976 -860
- data/schemas/alexandria.schemas +25 -3
- data/share/alexandria/glade/acquire_dialog__builder.glade +15 -12
- data/share/alexandria/glade/book_properties_dialog__builder.glade +171 -299
- data/share/alexandria/glade/main_app__builder.glade +24 -33
- data/share/alexandria/glade/new_book_dialog__builder.glade +27 -59
- data/share/alexandria/glade/preferences_dialog__builder.glade +250 -290
- data/share/gnome/help/alexandria/C/introduction.xml +0 -8
- data/share/gnome/help/alexandria/C/searching.xml +1 -1
- data/share/gnome/help/alexandria/C/smart-libraries.xml +2 -2
- data/share/gnome/help/alexandria/C/working-with-libraries.xml +1 -1
- data/share/gnome/help/alexandria/fr/alexandria.xml +1 -1
- data/share/gnome/help/alexandria/ja/introduction.xml +0 -8
- data/share/gnome/help/alexandria/ja/smart-libraries.xml +1 -1
- data/spec/alexandria/book_providers/world_cat_provider_spec.rb +160 -0
- data/spec/alexandria/book_providers_spec.rb +75 -171
- data/spec/alexandria/book_spec.rb +12 -10
- data/spec/alexandria/console_spec.rb +27 -0
- data/spec/alexandria/export_library_spec.rb +130 -0
- data/spec/alexandria/library_spec.rb +128 -172
- data/spec/alexandria/library_store_spec.rb +37 -0
- data/spec/alexandria/preferences_spec.rb +44 -17
- data/spec/alexandria/scanners/cue_cat_spec.rb +52 -0
- data/spec/alexandria/smart_library_spec.rb +30 -25
- data/spec/alexandria/ui/about_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/acquire_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/alert_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/book_properties_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/error_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/export_dialog_spec.rb +36 -0
- data/spec/alexandria/ui/icons_spec.rb +26 -0
- data/spec/alexandria/ui/iconview_spec.rb +7 -21
- data/spec/alexandria/ui/import_dialog_spec.rb +46 -0
- data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/main_app_spec.rb +7 -34
- data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +15 -0
- data/spec/alexandria/ui/new_book_dialog_spec.rb +22 -0
- data/spec/alexandria/ui/new_provider_dialog_spec.rb +30 -0
- data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +39 -0
- data/spec/alexandria/ui/preferences_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +34 -0
- data/spec/alexandria/ui/really_delete_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/sidepane_manager_spec.rb +15 -0
- data/spec/alexandria/ui/skip_entry_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +49 -0
- data/spec/alexandria/ui/sound_spec.rb +2 -2
- data/spec/alexandria/ui/ui_manager_spec.rb +43 -20
- data/spec/end_to_end/basic_run_spec.rb +52 -0
- data/spec/spec_helper.rb +65 -33
- data/tasks/setup.rb +2 -2
- data/tasks/spec.rake +16 -3
- data/util/rake/fileinstall.rb +39 -35
- data/util/rake/gettextgenerate.rb +7 -7
- data/util/rake/omfgenerate.rb +7 -7
- metadata +178 -45
- data/dogtail/basic_run_test.py +0 -9
- data/lib/alexandria/book_providers/deastore.rb +0 -265
- data/lib/alexandria/book_providers/mcu.rb +0 -182
- data/lib/alexandria/book_providers/renaud.rb +0 -149
- data/lib/alexandria/ui/dialogs/about_dialog.rb +0 -61
- data/lib/alexandria/ui/dialogs/alert_dialog.rb +0 -72
- data/lib/alexandria/ui/dialogs/bad_isbns_dialog.rb +0 -51
- data/lib/alexandria/ui/dialogs/book_properties_dialog_base.rb +0 -426
- data/lib/alexandria/ui/dialogs/export_dialog.rb +0 -171
- data/lib/alexandria/ui/dialogs/import_dialog.rb +0 -196
- data/lib/alexandria/ui/dialogs/misc_dialogs.rb +0 -87
- data/lib/alexandria/ui/dialogs/new_book_dialog_manual.rb +0 -154
- data/lib/alexandria/ui/dialogs/new_smart_library_dialog.rb +0 -74
- data/lib/alexandria/ui/dialogs/preferences_dialog.rb +0 -568
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog.rb +0 -59
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog_base.rb +0 -420
- data/spec/alexandria/scanners/cuecat_spec.rb +0 -67
- data/spec/alexandria/ui/dialogs_spec.rb +0 -96
- data/spec/alexandria/ui/sidepane_spec.rb +0 -29
- data/spec/alexandria/ui/ui_utilities_spec.rb +0 -62
- data/spec/alexandria/utilities_spec.rb +0 -52
- data/tasks/dogtail.rake +0 -6
@@ -22,103 +22,104 @@ module Alexandria
|
|
22
22
|
# A really simple regex-based parser to grab data out of marc text records.
|
23
23
|
class PseudoMarcParser
|
24
24
|
BNF_FR_MAPPINGS = {
|
25
|
-
title: [
|
26
|
-
authors: [
|
27
|
-
isbn: [
|
28
|
-
publisher: [
|
29
|
-
year: [
|
30
|
-
binding: [
|
31
|
-
notes: [
|
25
|
+
title: ["200", "a"],
|
26
|
+
authors: ["700", "a"],
|
27
|
+
isbn: ["010", "a"],
|
28
|
+
publisher: ["210", "g"],
|
29
|
+
year: ["210", "d"],
|
30
|
+
binding: ["225", "a"],
|
31
|
+
notes: ["520", "a"]
|
32
32
|
}.freeze
|
33
33
|
|
34
34
|
USMARC_MAPPINGS = {
|
35
|
-
title: [
|
36
|
-
authors: [
|
37
|
-
isbn: [
|
38
|
-
publisher: [
|
39
|
-
year: [
|
40
|
-
binding: [
|
41
|
-
notes: [
|
35
|
+
title: ["245", "a", "b"],
|
36
|
+
authors: ["100", "a"],
|
37
|
+
isbn: ["020", "a"],
|
38
|
+
publisher: ["490", "a"],
|
39
|
+
year: ["260", "c"],
|
40
|
+
binding: ["020", "a"], # listed with isbn here
|
41
|
+
notes: ["520", "a"]
|
42
42
|
}.freeze
|
43
43
|
|
44
|
-
def self.get_fields(data, type, stripping,
|
45
|
-
field =
|
46
|
-
|
44
|
+
def self.get_fields(data, type, stripping, mappings = USMARC_MAPPINGS)
|
45
|
+
field = ""
|
46
|
+
mappings[type][1..mappings[type].length - 1].each do |part|
|
47
47
|
if data.first[part]
|
48
48
|
part_data = data.first[part].strip
|
49
49
|
if part_data =~ stripping
|
50
50
|
part_data = Regexp.last_match[1]
|
51
51
|
part_data = part_data.strip
|
52
52
|
end
|
53
|
-
field +=
|
53
|
+
field += ": " if field != ""
|
54
54
|
field += part_data
|
55
55
|
end
|
56
56
|
end
|
57
|
-
field = nil if field ==
|
57
|
+
field = nil if field == ""
|
58
58
|
field
|
59
59
|
end
|
60
60
|
|
61
|
-
def self.marc_text_to_book(marc,
|
61
|
+
def self.marc_text_to_book(marc, mappings = USMARC_MAPPINGS)
|
62
62
|
details = marc_text_to_details(marc)
|
63
|
-
|
64
|
-
title = nil
|
65
|
-
title_data = details[m[:title][0]]
|
66
|
-
if title_data
|
67
|
-
title_data_all = get_fields(title_data, :title, /(.*)[\/:]$/, m)
|
68
|
-
title = title_data_all if title_data_all
|
69
|
-
end
|
63
|
+
return if details.empty?
|
70
64
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
author = Regexp.last_match[1] if author =~ /(.*),$/
|
78
|
-
authors << author
|
79
|
-
end
|
80
|
-
end
|
65
|
+
title = nil
|
66
|
+
title_data = details[mappings[:title][0]]
|
67
|
+
if title_data
|
68
|
+
title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, mappings)
|
69
|
+
title = title_data_all if title_data_all
|
70
|
+
end
|
81
71
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
72
|
+
authors = []
|
73
|
+
author_data = details[mappings[:authors][0]]
|
74
|
+
author_data&.each do |ad|
|
75
|
+
author = ad[mappings[:authors][1]]
|
76
|
+
if author
|
77
|
+
author = author.strip
|
78
|
+
author = Regexp.last_match[1] if author =~ /(.*),$/
|
79
|
+
authors << author
|
87
80
|
end
|
81
|
+
end
|
88
82
|
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
83
|
+
isbn = nil
|
84
|
+
binding = nil
|
85
|
+
isbn_data = details[mappings[:isbn][0]]
|
86
|
+
if isbn_data && isbn_data.first[mappings[:isbn][1]] =~ /([-0-9xX]+)/
|
87
|
+
isbn = Regexp.last_match[1]
|
88
|
+
end
|
93
89
|
|
94
|
-
|
95
|
-
|
96
|
-
|
90
|
+
binding_data = details[mappings[:binding][0]]
|
91
|
+
if binding_data &&
|
92
|
+
binding_data.first[mappings[:binding][1]] =~ /([a-zA-Z][a-z\s]+[a-z])/
|
93
|
+
binding = Regexp.last_match[1]
|
94
|
+
end
|
97
95
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
year = publication_data.first[m[:year][1]]
|
102
|
-
year = Regexp.last_match[1].to_i if year =~ /(\d+)/
|
103
|
-
end
|
96
|
+
publisher = nil
|
97
|
+
publisher_data = details[mappings[:publisher][0]]
|
98
|
+
publisher = publisher_data.first[mappings[:publisher][1]] if publisher_data
|
104
99
|
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
100
|
+
year = nil
|
101
|
+
publication_data = details[mappings[:year][0]]
|
102
|
+
if publication_data
|
103
|
+
year = publication_data.first[mappings[:year][1]]
|
104
|
+
year = Regexp.last_match[1].to_i if year =~ /(\d+)/
|
105
|
+
end
|
111
106
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
107
|
+
notes = ""
|
108
|
+
notes_data = details[mappings[:notes][0]]
|
109
|
+
notes_data&.each do |note|
|
110
|
+
txt = note[mappings[:notes][1]]
|
111
|
+
notes += txt if txt
|
112
|
+
end
|
116
113
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
book
|
114
|
+
if title.nil? && isbn.nil?
|
115
|
+
# probably didn't undertand the MARC dialect
|
116
|
+
return nil
|
121
117
|
end
|
118
|
+
|
119
|
+
book = Alexandria::Book.new(title, authors, isbn,
|
120
|
+
publisher, year, binding)
|
121
|
+
book.notes = notes unless notes.empty?
|
122
|
+
book
|
122
123
|
end
|
123
124
|
|
124
125
|
def self.marc_text_to_details(marc)
|
@@ -130,31 +131,22 @@ module Alexandria
|
|
130
131
|
|
131
132
|
this_line_data = {}
|
132
133
|
|
133
|
-
# puts code
|
134
|
-
# puts data
|
135
134
|
d_idx = 0
|
136
135
|
while d_idx < data.size
|
137
136
|
d_str = data[d_idx..-1]
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
# puts " " + $2
|
146
|
-
# puts idx
|
147
|
-
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
148
|
-
else
|
149
|
-
break
|
150
|
-
end
|
137
|
+
idx = d_str =~ /\$([a-z]) ([^$]+)/
|
138
|
+
break unless idx
|
139
|
+
|
140
|
+
sub_code = Regexp.last_match[1]
|
141
|
+
sub_data = Regexp.last_match[2]
|
142
|
+
this_line_data[sub_code] = sub_data
|
143
|
+
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
151
144
|
end
|
152
145
|
|
153
146
|
unless this_line_data.empty?
|
154
147
|
details[code] = [] unless details.key?(code)
|
155
148
|
details[code] << this_line_data
|
156
149
|
end
|
157
|
-
|
158
150
|
end
|
159
151
|
end
|
160
152
|
details
|
@@ -25,27 +25,27 @@
|
|
25
25
|
# Almost completely rewritten by Cathal Mc Ginley (21 Feb 2009)
|
26
26
|
# based on the new code for Palatina
|
27
27
|
|
28
|
-
require
|
29
|
-
require
|
30
|
-
require
|
28
|
+
require "net/http"
|
29
|
+
require "cgi"
|
30
|
+
require "alexandria/book_providers/web"
|
31
31
|
|
32
32
|
module Alexandria
|
33
33
|
class BookProviders
|
34
34
|
class SicilianoProvider < WebsiteBasedProvider
|
35
35
|
include Logging
|
36
36
|
|
37
|
-
SITE =
|
37
|
+
SITE = "http://www.siciliano.com.br"
|
38
38
|
|
39
39
|
# The string interpolations in this URL are the search term and search
|
40
40
|
# type, respectively.
|
41
41
|
BASE_SEARCH_URL = "#{SITE}/pesquisaweb/pesquisaweb.dll/pesquisa?" \
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
42
|
+
"&FIL_ID=102" \
|
43
|
+
"&PALAVRASN1=%s" \
|
44
|
+
"&FILTRON1=%s" \
|
45
|
+
"&ESTRUTN1=0301&ORDEMN2=E"
|
46
46
|
|
47
47
|
def initialize
|
48
|
-
super(
|
48
|
+
super("Siciliano", "Livraria Siciliano (Brasil)")
|
49
49
|
# no preferences for the moment
|
50
50
|
prefs.read
|
51
51
|
end
|
@@ -57,7 +57,7 @@ module Alexandria
|
|
57
57
|
end
|
58
58
|
|
59
59
|
def search(criterion, type)
|
60
|
-
criterion = criterion.encode(
|
60
|
+
criterion = criterion.encode("ISO-8859-1") # still needed??
|
61
61
|
trying_again = false
|
62
62
|
begin
|
63
63
|
req = create_search_uri(type, criterion, trying_again)
|
@@ -71,13 +71,13 @@ module Alexandria
|
|
71
71
|
else
|
72
72
|
results.map { |result| get_book_from_search_result(result) }
|
73
73
|
end
|
74
|
-
rescue NoResultsError =>
|
74
|
+
rescue NoResultsError => ex
|
75
75
|
if (type == SEARCH_BY_ISBN) && (trying_again == false)
|
76
76
|
trying_again = true
|
77
77
|
retry
|
78
|
-
else
|
79
|
-
raise err
|
80
78
|
end
|
79
|
+
|
80
|
+
raise ex
|
81
81
|
end
|
82
82
|
end
|
83
83
|
|
@@ -90,10 +90,10 @@ module Alexandria
|
|
90
90
|
private
|
91
91
|
|
92
92
|
def create_search_uri(search_type, search_term, trying_again = false)
|
93
|
-
(search_type_code = { SEARCH_BY_ISBN
|
94
|
-
SEARCH_BY_TITLE
|
95
|
-
SEARCH_BY_AUTHORS =>
|
96
|
-
SEARCH_BY_KEYWORD =>
|
93
|
+
(search_type_code = { SEARCH_BY_ISBN => "G",
|
94
|
+
SEARCH_BY_TITLE => "A",
|
95
|
+
SEARCH_BY_AUTHORS => "B",
|
96
|
+
SEARCH_BY_KEYWORD => "X" }[search_type]) || "X"
|
97
97
|
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
98
98
|
if trying_again
|
99
99
|
# on second attempt, try ISBN-10...
|
@@ -123,35 +123,33 @@ module Alexandria
|
|
123
123
|
book_search_results = []
|
124
124
|
# each result will be a dict with keys :title, :author, :publisher, :url
|
125
125
|
|
126
|
-
list_items = doc.search(
|
126
|
+
list_items = doc.search("div.pesquisa-item-lista-conteudo")
|
127
127
|
list_items.each do |item|
|
128
|
-
|
129
|
-
result = {}
|
128
|
+
result = {}
|
130
129
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
130
|
+
# author & publisher
|
131
|
+
author_publisher = ""
|
132
|
+
item.children.each do |node|
|
133
|
+
author_publisher += node.to_s if node.text?
|
134
|
+
author_publisher.strip!
|
135
|
+
break unless author_publisher.empty?
|
136
|
+
end
|
137
|
+
author, publisher = author_publisher.split("/")
|
138
|
+
result[:author] = author.strip if author
|
139
|
+
result[:publisher] = publisher.strip if publisher
|
141
140
|
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
141
|
+
# title & url
|
142
|
+
link = item % "a"
|
143
|
+
result[:title] = link.inner_text.strip
|
144
|
+
link_to_description = link["href"]
|
145
|
+
slash = ""
|
146
|
+
slash = "/" unless link_to_description.start_with?("/")
|
147
|
+
result[:url] = "#{SITE}#{slash}#{link_to_description}"
|
149
148
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
end
|
149
|
+
book_search_results << result
|
150
|
+
rescue StandardError => ex
|
151
|
+
trace = ex.backtrace.join("\n> ")
|
152
|
+
log.error { "Failed parsing Siciliano search page #{ex.message}\n#{trace}" }
|
155
153
|
end
|
156
154
|
|
157
155
|
book_search_results
|
@@ -161,59 +159,60 @@ module Alexandria
|
|
161
159
|
# checked against Siciliano website 21 Feb 2009
|
162
160
|
doc = html_to_doc(html)
|
163
161
|
# title
|
164
|
-
title_div = doc %
|
162
|
+
title_div = doc % "div#conteudo//div.titulo"
|
165
163
|
raise NoResultsError unless title_div
|
166
|
-
|
164
|
+
|
165
|
+
title_h = title_div % "h2"
|
167
166
|
title = title_h.inner_text if title_h
|
168
167
|
# title = first_non_empty_text_node(title_div)
|
169
168
|
# author_spans = doc/'span.rotulo'
|
170
|
-
author_hs = title_div /
|
169
|
+
author_hs = title_div / "h3.autor"
|
171
170
|
authors = []
|
172
171
|
author_hs.each do |h|
|
173
172
|
authors << h.inner_text.strip
|
174
173
|
end
|
175
174
|
## synopsis_div = doc % 'div#sinopse'
|
176
|
-
details_div = doc %
|
175
|
+
details_div = doc % "div#tab-caracteristica"
|
177
176
|
details = string_array_to_map(lines_of_text_as_array(details_div))
|
178
177
|
# ISBN
|
179
|
-
isbn = details[
|
178
|
+
isbn = details["ISBN"]
|
180
179
|
## ean = details["CdBarras"]
|
181
|
-
translator = details[
|
180
|
+
translator = details["Tradutor"]
|
182
181
|
authors << translator if translator
|
183
|
-
binding = details[
|
182
|
+
binding = details["Acabamento"]
|
184
183
|
publisher = search_result[:publisher]
|
185
184
|
# publish year
|
186
185
|
publish_year = nil
|
187
|
-
edition = details[
|
188
|
-
|
189
|
-
|
190
|
-
end
|
186
|
+
edition = details["Edio"]
|
187
|
+
# publication date
|
188
|
+
publish_year = Regexp.last_match[1].to_i if edition && edition =~ /([12][0-9]{3})/
|
191
189
|
# cover
|
192
190
|
# ImgSrc[1]="/imagem/imagem.dll?pro_id=1386929&PIM_Id=658849";
|
193
191
|
image_urls = []
|
194
|
-
(doc /
|
192
|
+
(doc / "script").each do |script|
|
195
193
|
next if script.children.nil?
|
194
|
+
|
196
195
|
script.children.each do |ch|
|
197
196
|
ch_text = ch.to_s
|
198
|
-
if ch_text =~ /ImgSrc\[
|
197
|
+
if ch_text =~ /ImgSrc\[\d\]="(.+)";/
|
199
198
|
img_link = Regexp.last_match[1]
|
200
199
|
image_urls << img_link
|
201
200
|
end
|
202
201
|
end
|
203
202
|
end
|
204
203
|
book = Book.new(title, authors, isbn, publisher, publish_year, binding)
|
205
|
-
|
206
|
-
|
207
|
-
rescue => ex
|
204
|
+
[book, image_urls.first]
|
205
|
+
rescue StandardError => ex
|
208
206
|
trace = ex.backtrace.join("\n> ")
|
209
207
|
log.error { "Failed parsing Siciliano product page #{ex.message}\n#{trace}" }
|
210
208
|
nil
|
211
209
|
end
|
212
210
|
|
213
211
|
def first_non_empty_text_node(elem)
|
214
|
-
text =
|
212
|
+
text = ""
|
215
213
|
elem.children.each do |node|
|
216
214
|
next unless node.text?
|
215
|
+
|
217
216
|
text = node.to_s.strip
|
218
217
|
break unless text.empty?
|
219
218
|
end
|
@@ -222,28 +221,28 @@ module Alexandria
|
|
222
221
|
|
223
222
|
def lines_of_text_as_array(elem)
|
224
223
|
lines = []
|
225
|
-
current_text =
|
224
|
+
current_text = ""
|
226
225
|
elem.children.each do |e|
|
227
226
|
if e.text?
|
228
227
|
current_text += e.to_s
|
229
|
-
elsif e.name ==
|
228
|
+
elsif e.name == "br"
|
230
229
|
lines << current_text.strip
|
231
|
-
current_text =
|
230
|
+
current_text = ""
|
232
231
|
else
|
233
232
|
current_text += e.inner_text
|
234
233
|
end
|
235
234
|
end
|
236
235
|
lines << current_text.strip
|
237
|
-
lines.delete(
|
236
|
+
lines.delete("")
|
238
237
|
lines
|
239
238
|
end
|
240
239
|
|
241
240
|
def string_array_to_map(arr)
|
242
241
|
map = {}
|
243
242
|
arr.each do |str|
|
244
|
-
key, val = str.split(
|
243
|
+
key, val = str.split(":")
|
245
244
|
# a real hack for not handling encoding properly :^)
|
246
|
-
map[key.gsub(/[^a-zA-Z]/,
|
245
|
+
map[key.gsub(/[^a-zA-Z]/, "")] = val.strip if val
|
247
246
|
end
|
248
247
|
map
|
249
248
|
end
|