alexandria-book-collection-manager 0.7.1 → 0.7.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.github/dependabot.yml +9 -0
- data/.gitignore +5 -2
- data/.hound.yml +2 -0
- data/.rubocop.yml +113 -45
- data/.rubocop_todo.yml +82 -170
- data/.simplecov +5 -1
- data/.travis.yml +45 -0
- data/.yardopts +1 -1
- data/CHANGELOG.md +60 -0
- data/ChangeLog.0 +33 -35
- data/Gemfile +6 -5
- data/INSTALL.md +164 -0
- data/README.md +52 -42
- data/Rakefile +95 -109
- data/TODO.md +9 -1
- data/alexandria-book-collection-manager.gemspec +52 -45
- data/bin/alexandria +31 -53
- data/doc/AUTHORS +61 -0
- data/doc/BUGS +31 -0
- data/doc/FAQ +365 -0
- data/doc/HACKING +19 -0
- data/doc/NEWS +341 -0
- data/doc/alexandria.1 +120 -0
- data/doc/cuecat_support.rdoc +67 -0
- data/doc/dependency_decisions.yml +80 -0
- data/lib/alexandria.rb +29 -37
- data/lib/alexandria/about.rb +52 -51
- data/lib/alexandria/book_providers.rb +94 -101
- data/lib/alexandria/book_providers/adlibris.rb +45 -85
- data/lib/alexandria/book_providers/amazon_aws.rb +105 -113
- data/lib/alexandria/book_providers/amazon_ecs_util.rb +293 -324
- data/lib/alexandria/book_providers/barnes_and_noble.rb +54 -53
- data/lib/alexandria/book_providers/douban.rb +29 -51
- data/lib/alexandria/book_providers/proxis.rb +42 -59
- data/lib/alexandria/book_providers/pseudomarc.rb +79 -99
- data/lib/alexandria/book_providers/siciliano.rb +68 -70
- data/lib/alexandria/book_providers/thalia.rb +46 -45
- data/lib/alexandria/book_providers/web.rb +17 -33
- data/lib/alexandria/book_providers/worldcat.rb +74 -102
- data/lib/alexandria/book_providers/z3950.rb +170 -174
- data/lib/alexandria/config.rb +5 -3
- data/lib/alexandria/console.rb +10 -21
- data/lib/alexandria/default_preferences.rb +37 -0
- data/lib/alexandria/execution_queue.rb +17 -15
- data/lib/alexandria/export_format.rb +47 -0
- data/lib/alexandria/export_library.rb +188 -302
- data/lib/alexandria/import_library.rb +114 -155
- data/lib/alexandria/import_library_csv.rb +46 -96
- data/lib/alexandria/library_collection.rb +79 -0
- data/lib/alexandria/library_sort_order.rb +45 -0
- data/lib/alexandria/library_store.rb +233 -0
- data/lib/alexandria/logging.rb +15 -19
- data/lib/alexandria/models/book.rb +15 -20
- data/lib/alexandria/models/library.rb +81 -363
- data/lib/alexandria/net.rb +7 -6
- data/lib/alexandria/preferences.rb +73 -91
- data/lib/alexandria/scanners.rb +4 -2
- data/lib/alexandria/scanners/{cuecat.rb → cue_cat.rb} +24 -20
- data/lib/alexandria/scanners/keyboard.rb +10 -8
- data/lib/alexandria/smart_library.rb +135 -171
- data/lib/alexandria/ui.rb +17 -15
- data/lib/alexandria/ui/about_dialog.rb +49 -0
- data/lib/alexandria/ui/{dialogs/acquire_dialog.rb → acquire_dialog.rb} +129 -152
- data/lib/alexandria/ui/alert_dialog.rb +64 -0
- data/lib/alexandria/ui/bad_isbns_dialog.rb +41 -0
- data/lib/alexandria/ui/{dialogs/barcode_animation.rb → barcode_animation.rb} +18 -15
- data/lib/alexandria/ui/{dialogs/book_properties_dialog.rb → book_properties_dialog.rb} +44 -61
- data/lib/alexandria/ui/{dialogs/book_properties_dialog_base.rb → book_properties_dialog_base.rb} +84 -89
- data/lib/alexandria/ui/builder_base.rb +9 -27
- data/lib/alexandria/ui/callbacks.rb +188 -186
- data/lib/alexandria/ui/columns.rb +2 -0
- data/lib/alexandria/ui/completion_models.rb +12 -23
- data/lib/alexandria/ui/confirm_erase_dialog.rb +33 -0
- data/lib/alexandria/ui/conflict_while_copying_dialog.rb +34 -0
- data/lib/alexandria/ui/dndable.rb +10 -8
- data/lib/alexandria/ui/error_dialog.rb +25 -0
- data/lib/alexandria/ui/export_dialog.rb +139 -0
- data/lib/alexandria/ui/icons.rb +49 -65
- data/lib/alexandria/ui/iconview.rb +15 -13
- data/lib/alexandria/ui/iconview_tooltips.rb +43 -58
- data/lib/alexandria/ui/import_dialog.rb +157 -0
- data/lib/alexandria/ui/init.rb +23 -33
- data/lib/alexandria/ui/keep_bad_isbn_dialog.rb +36 -0
- data/lib/alexandria/ui/libraries_combo.rb +18 -14
- data/lib/alexandria/ui/listview.rb +77 -88
- data/lib/alexandria/ui/main_app.rb +26 -26
- data/lib/alexandria/ui/misc_dialogs.rb +10 -0
- data/lib/alexandria/ui/multi_drag_treeview.rb +30 -41
- data/lib/alexandria/ui/{dialogs/new_book_dialog.rb → new_book_dialog.rb} +168 -215
- data/lib/alexandria/ui/new_book_dialog_manual.rb +139 -0
- data/lib/alexandria/ui/new_provider_dialog.rb +100 -0
- data/lib/alexandria/ui/new_smart_library_dialog.rb +74 -0
- data/lib/alexandria/ui/preferences_dialog.rb +313 -0
- data/lib/alexandria/ui/provider_preferences_base_dialog.rb +95 -0
- data/lib/alexandria/ui/provider_preferences_dialog.rb +35 -0
- data/lib/alexandria/ui/really_delete_dialog.rb +53 -0
- data/lib/alexandria/ui/{sidepane.rb → sidepane_manager.rb} +62 -72
- data/lib/alexandria/ui/skip_entry_dialog.rb +33 -0
- data/lib/alexandria/ui/smart_library_properties_dialog.rb +60 -0
- data/lib/alexandria/ui/{dialogs/smart_library_properties_dialog_base.rb → smart_library_properties_dialog_base.rb} +96 -172
- data/lib/alexandria/ui/smart_library_rule_box.rb +119 -0
- data/lib/alexandria/ui/sound.rb +13 -13
- data/lib/alexandria/ui/ui_manager.rb +262 -283
- data/lib/alexandria/undo_manager.rb +3 -0
- data/lib/alexandria/version.rb +6 -19
- data/lib/alexandria/web_themes.rb +24 -21
- data/po/Makefile +2 -2
- data/po/cs.po +993 -880
- data/po/cy.po +957 -874
- data/po/de.po +990 -869
- data/po/el.po +989 -869
- data/po/es.po +985 -865
- data/po/fr.po +986 -870
- data/po/ga.po +907 -823
- data/po/gl.po +981 -865
- data/po/it.po +986 -868
- data/po/ja.po +969 -853
- data/po/mk.po +983 -863
- data/po/nb.po +979 -863
- data/po/nl.po +983 -864
- data/po/pl.po +1020 -969
- data/po/pt.po +988 -861
- data/po/pt_BR.po +984 -868
- data/po/ru.po +992 -873
- data/po/sk.po +987 -869
- data/po/sv.po +977 -861
- data/po/uk.po +975 -865
- data/po/zh_TW.po +976 -860
- data/schemas/alexandria.schemas +25 -3
- data/share/alexandria/glade/acquire_dialog__builder.glade +15 -12
- data/share/alexandria/glade/book_properties_dialog__builder.glade +171 -299
- data/share/alexandria/glade/main_app__builder.glade +24 -33
- data/share/alexandria/glade/new_book_dialog__builder.glade +27 -59
- data/share/alexandria/glade/preferences_dialog__builder.glade +250 -290
- data/share/gnome/help/alexandria/C/introduction.xml +0 -8
- data/share/gnome/help/alexandria/C/searching.xml +1 -1
- data/share/gnome/help/alexandria/C/smart-libraries.xml +2 -2
- data/share/gnome/help/alexandria/C/working-with-libraries.xml +1 -1
- data/share/gnome/help/alexandria/fr/alexandria.xml +1 -1
- data/share/gnome/help/alexandria/ja/introduction.xml +0 -8
- data/share/gnome/help/alexandria/ja/smart-libraries.xml +1 -1
- data/spec/alexandria/book_providers/world_cat_provider_spec.rb +160 -0
- data/spec/alexandria/book_providers_spec.rb +77 -210
- data/spec/alexandria/book_spec.rb +16 -12
- data/spec/alexandria/console_spec.rb +27 -0
- data/spec/alexandria/export_library_spec.rb +130 -0
- data/spec/alexandria/library_spec.rb +130 -172
- data/spec/alexandria/library_store_spec.rb +37 -0
- data/spec/alexandria/preferences_spec.rb +46 -17
- data/spec/alexandria/scanners/cue_cat_spec.rb +52 -0
- data/spec/alexandria/smart_library_spec.rb +32 -25
- data/spec/alexandria/ui/about_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/acquire_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/alert_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/book_properties_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/error_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/export_dialog_spec.rb +15 -0
- data/spec/alexandria/ui/icons_spec.rb +26 -0
- data/spec/alexandria/ui/iconview_spec.rb +9 -21
- data/spec/alexandria/ui/import_dialog_spec.rb +41 -0
- data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +17 -0
- data/spec/alexandria/ui/main_app_spec.rb +8 -33
- data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +15 -0
- data/spec/alexandria/ui/new_book_dialog_spec.rb +22 -0
- data/spec/alexandria/ui/new_provider_dialog_spec.rb +30 -0
- data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +39 -0
- data/spec/alexandria/ui/preferences_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +34 -0
- data/spec/alexandria/ui/really_delete_dialog_spec.rb +16 -0
- data/spec/alexandria/ui/sidepane_manager_spec.rb +15 -0
- data/spec/alexandria/ui/skip_entry_dialog_spec.rb +14 -0
- data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +32 -0
- data/spec/alexandria/ui/sound_spec.rb +4 -2
- data/spec/alexandria/ui/ui_manager_spec.rb +45 -20
- data/spec/end_to_end/basic_run_spec.rb +57 -0
- data/spec/spec_helper.rb +66 -33
- data/tasks/setup.rb +5 -3
- data/tasks/spec.rake +18 -3
- data/util/rake/fileinstall.rb +38 -40
- data/util/rake/gettextgenerate.rb +15 -70
- data/util/rake/omfgenerate.rb +10 -10
- metadata +176 -60
- data/INSTALL.rdoc +0 -148
- data/dogtail/basic_run_test.py +0 -9
- data/lib/alexandria/book_providers/bol_it.rb +0 -160
- data/lib/alexandria/book_providers/deastore.rb +0 -273
- data/lib/alexandria/book_providers/ibs_it.rb +0 -147
- data/lib/alexandria/book_providers/mcu.rb +0 -169
- data/lib/alexandria/book_providers/renaud.rb +0 -140
- data/lib/alexandria/book_providers/webster_it.rb +0 -167
- data/lib/alexandria/ui/dialogs/about_dialog.rb +0 -59
- data/lib/alexandria/ui/dialogs/alert_dialog.rb +0 -70
- data/lib/alexandria/ui/dialogs/bad_isbns_dialog.rb +0 -43
- data/lib/alexandria/ui/dialogs/export_dialog.rb +0 -171
- data/lib/alexandria/ui/dialogs/import_dialog.rb +0 -196
- data/lib/alexandria/ui/dialogs/misc_dialogs.rb +0 -85
- data/lib/alexandria/ui/dialogs/new_book_dialog_manual.rb +0 -154
- data/lib/alexandria/ui/dialogs/new_smart_library_dialog.rb +0 -74
- data/lib/alexandria/ui/dialogs/preferences_dialog.rb +0 -578
- data/lib/alexandria/ui/dialogs/smart_library_properties_dialog.rb +0 -57
- data/spec/alexandria/scanners/cuecat_spec.rb +0 -65
- data/spec/alexandria/ui/dialogs_spec.rb +0 -94
- data/spec/alexandria/ui/sidepane_spec.rb +0 -27
- data/spec/alexandria/ui/ui_utilities_spec.rb +0 -60
- data/spec/alexandria/utilities_spec.rb +0 -50
- data/tasks/dogtail.rake +0 -4
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Copyright (C) 2009 Cathal Mc Ginley
|
2
4
|
# Copyright (C) 2010 Martin Sucha
|
3
5
|
#
|
@@ -20,117 +22,104 @@ module Alexandria
|
|
20
22
|
# A really simple regex-based parser to grab data out of marc text records.
|
21
23
|
class PseudoMarcParser
|
22
24
|
BNF_FR_MAPPINGS = {
|
23
|
-
title: [
|
24
|
-
authors: [
|
25
|
-
isbn: [
|
26
|
-
publisher: [
|
27
|
-
year: [
|
28
|
-
binding: [
|
29
|
-
notes: [
|
25
|
+
title: ["200", "a"],
|
26
|
+
authors: ["700", "a"],
|
27
|
+
isbn: ["010", "a"],
|
28
|
+
publisher: ["210", "g"],
|
29
|
+
year: ["210", "d"],
|
30
|
+
binding: ["225", "a"],
|
31
|
+
notes: ["520", "a"]
|
30
32
|
}.freeze
|
31
33
|
|
32
34
|
USMARC_MAPPINGS = {
|
33
|
-
title: [
|
34
|
-
authors: [
|
35
|
-
isbn: [
|
36
|
-
publisher: [
|
37
|
-
year: [
|
38
|
-
binding: [
|
39
|
-
notes: [
|
35
|
+
title: ["245", "a", "b"],
|
36
|
+
authors: ["100", "a"],
|
37
|
+
isbn: ["020", "a"],
|
38
|
+
publisher: ["490", "a"],
|
39
|
+
year: ["260", "c"],
|
40
|
+
binding: ["020", "a"], # listed with isbn here
|
41
|
+
notes: ["520", "a"]
|
40
42
|
}.freeze
|
41
43
|
|
42
|
-
def self.get_fields(data, type, stripping,
|
43
|
-
field =
|
44
|
-
|
44
|
+
def self.get_fields(data, type, stripping, mappings = USMARC_MAPPINGS)
|
45
|
+
field = ""
|
46
|
+
mappings[type][1..mappings[type].length - 1].each do |part|
|
45
47
|
if data.first[part]
|
46
48
|
part_data = data.first[part].strip
|
47
49
|
if part_data =~ stripping
|
48
50
|
part_data = Regexp.last_match[1]
|
49
51
|
part_data = part_data.strip
|
50
52
|
end
|
51
|
-
field +=
|
53
|
+
field += ": " if field != ""
|
52
54
|
field += part_data
|
53
55
|
end
|
54
56
|
end
|
55
|
-
field = nil if field ==
|
57
|
+
field = nil if field == ""
|
56
58
|
field
|
57
59
|
end
|
58
60
|
|
59
|
-
def self.marc_text_to_book(marc,
|
61
|
+
def self.marc_text_to_book(marc, mappings = USMARC_MAPPINGS)
|
60
62
|
details = marc_text_to_details(marc)
|
61
|
-
|
62
|
-
title = nil
|
63
|
-
title_data = details[m[:title][0]]
|
64
|
-
if title_data
|
65
|
-
title_data_all = get_fields(title_data, :title, /(.*)[\/:]$/, m)
|
66
|
-
title = title_data_all if title_data_all
|
67
|
-
end
|
63
|
+
return if details.empty?
|
68
64
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
author = author.strip
|
76
|
-
if author =~ /(.*),$/
|
77
|
-
author = Regexp.last_match[1]
|
78
|
-
end
|
79
|
-
authors << author
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
65
|
+
title = nil
|
66
|
+
title_data = details[mappings[:title][0]]
|
67
|
+
if title_data
|
68
|
+
title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, mappings)
|
69
|
+
title = title_data_all if title_data_all
|
70
|
+
end
|
83
71
|
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
72
|
+
authors = []
|
73
|
+
author_data = details[mappings[:authors][0]]
|
74
|
+
author_data&.each do |ad|
|
75
|
+
author = ad[mappings[:authors][1]]
|
76
|
+
if author
|
77
|
+
author = author.strip
|
78
|
+
author = Regexp.last_match[1] if author =~ /(.*),$/
|
79
|
+
authors << author
|
91
80
|
end
|
81
|
+
end
|
92
82
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
83
|
+
isbn = nil
|
84
|
+
binding = nil
|
85
|
+
isbn_data = details[mappings[:isbn][0]]
|
86
|
+
if isbn_data && isbn_data.first[mappings[:isbn][1]] =~ /([-0-9xX]+)/
|
87
|
+
isbn = Regexp.last_match[1]
|
88
|
+
end
|
99
89
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
90
|
+
binding_data = details[mappings[:binding][0]]
|
91
|
+
if binding_data &&
|
92
|
+
binding_data.first[mappings[:binding][1]] =~ /([a-zA-Z][a-z\s]+[a-z])/
|
93
|
+
binding = Regexp.last_match[1]
|
94
|
+
end
|
105
95
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
year = publication_data.first[m[:year][1]]
|
110
|
-
if year =~ /(\d+)/
|
111
|
-
year = Regexp.last_match[1].to_i
|
112
|
-
end
|
113
|
-
end
|
96
|
+
publisher = nil
|
97
|
+
publisher_data = details[mappings[:publisher][0]]
|
98
|
+
publisher = publisher_data.first[mappings[:publisher][1]] if publisher_data
|
114
99
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
end
|
122
|
-
end
|
100
|
+
year = nil
|
101
|
+
publication_data = details[mappings[:year][0]]
|
102
|
+
if publication_data
|
103
|
+
year = publication_data.first[mappings[:year][1]]
|
104
|
+
year = Regexp.last_match[1].to_i if year =~ /(\d+)/
|
105
|
+
end
|
123
106
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
107
|
+
notes = ""
|
108
|
+
notes_data = details[mappings[:notes][0]]
|
109
|
+
notes_data&.each do |note|
|
110
|
+
txt = note[mappings[:notes][1]]
|
111
|
+
notes += txt if txt
|
112
|
+
end
|
128
113
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
book
|
114
|
+
if title.nil? && isbn.nil?
|
115
|
+
# probably didn't undertand the MARC dialect
|
116
|
+
return nil
|
133
117
|
end
|
118
|
+
|
119
|
+
book = Alexandria::Book.new(title, authors, isbn,
|
120
|
+
publisher, year, binding)
|
121
|
+
book.notes = notes unless notes.empty?
|
122
|
+
book
|
134
123
|
end
|
135
124
|
|
136
125
|
def self.marc_text_to_details(marc)
|
@@ -142,31 +131,22 @@ module Alexandria
|
|
142
131
|
|
143
132
|
this_line_data = {}
|
144
133
|
|
145
|
-
# puts code
|
146
|
-
# puts data
|
147
134
|
d_idx = 0
|
148
135
|
while d_idx < data.size
|
149
136
|
d_str = data[d_idx..-1]
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
# puts " " + $2
|
158
|
-
# puts idx
|
159
|
-
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
160
|
-
else
|
161
|
-
break
|
162
|
-
end
|
137
|
+
idx = d_str =~ /\$([a-z]) ([^$]+)/
|
138
|
+
break unless idx
|
139
|
+
|
140
|
+
sub_code = Regexp.last_match[1]
|
141
|
+
sub_data = Regexp.last_match[2]
|
142
|
+
this_line_data[sub_code] = sub_data
|
143
|
+
d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
|
163
144
|
end
|
164
145
|
|
165
146
|
unless this_line_data.empty?
|
166
147
|
details[code] = [] unless details.key?(code)
|
167
148
|
details[code] << this_line_data
|
168
149
|
end
|
169
|
-
|
170
150
|
end
|
171
151
|
end
|
172
152
|
details
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Copyright (C) 2004 Laurent Sansonetti
|
2
4
|
# Copyright (C) 2007 Laurent Sansonetti and Marco Costantini
|
3
5
|
# Copyright (C) 2009 Cathal Mc Ginley
|
@@ -23,27 +25,27 @@
|
|
23
25
|
# Almost completely rewritten by Cathal Mc Ginley (21 Feb 2009)
|
24
26
|
# based on the new code for Palatina
|
25
27
|
|
26
|
-
require
|
27
|
-
require
|
28
|
-
require
|
28
|
+
require "net/http"
|
29
|
+
require "cgi"
|
30
|
+
require "alexandria/book_providers/web"
|
29
31
|
|
30
32
|
module Alexandria
|
31
33
|
class BookProviders
|
32
34
|
class SicilianoProvider < WebsiteBasedProvider
|
33
35
|
include Logging
|
34
36
|
|
35
|
-
SITE =
|
37
|
+
SITE = "http://www.siciliano.com.br"
|
36
38
|
|
37
39
|
# The string interpolations in this URL are the search term and search
|
38
40
|
# type, respectively.
|
39
41
|
BASE_SEARCH_URL = "#{SITE}/pesquisaweb/pesquisaweb.dll/pesquisa?" \
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
"&FIL_ID=102" \
|
43
|
+
"&PALAVRASN1=%s" \
|
44
|
+
"&FILTRON1=%s" \
|
45
|
+
"&ESTRUTN1=0301&ORDEMN2=E"
|
44
46
|
|
45
47
|
def initialize
|
46
|
-
super(
|
48
|
+
super("Siciliano", "Livraria Siciliano (Brasil)")
|
47
49
|
# no preferences for the moment
|
48
50
|
prefs.read
|
49
51
|
end
|
@@ -55,7 +57,7 @@ module Alexandria
|
|
55
57
|
end
|
56
58
|
|
57
59
|
def search(criterion, type)
|
58
|
-
criterion = criterion.encode(
|
60
|
+
criterion = criterion.encode("ISO-8859-1") # still needed??
|
59
61
|
trying_again = false
|
60
62
|
begin
|
61
63
|
req = create_search_uri(type, criterion, trying_again)
|
@@ -69,14 +71,13 @@ module Alexandria
|
|
69
71
|
else
|
70
72
|
results.map { |result| get_book_from_search_result(result) }
|
71
73
|
end
|
72
|
-
|
73
|
-
rescue NoResultsError => err
|
74
|
+
rescue NoResultsError => ex
|
74
75
|
if (type == SEARCH_BY_ISBN) && (trying_again == false)
|
75
76
|
trying_again = true
|
76
77
|
retry
|
77
|
-
else
|
78
|
-
raise err
|
79
78
|
end
|
79
|
+
|
80
|
+
raise ex
|
80
81
|
end
|
81
82
|
end
|
82
83
|
|
@@ -89,10 +90,10 @@ module Alexandria
|
|
89
90
|
private
|
90
91
|
|
91
92
|
def create_search_uri(search_type, search_term, trying_again = false)
|
92
|
-
(search_type_code = { SEARCH_BY_ISBN
|
93
|
-
SEARCH_BY_TITLE
|
94
|
-
SEARCH_BY_AUTHORS =>
|
95
|
-
SEARCH_BY_KEYWORD =>
|
93
|
+
(search_type_code = { SEARCH_BY_ISBN => "G",
|
94
|
+
SEARCH_BY_TITLE => "A",
|
95
|
+
SEARCH_BY_AUTHORS => "B",
|
96
|
+
SEARCH_BY_KEYWORD => "X" }[search_type]) || "X"
|
96
97
|
search_term_encoded = if search_type == SEARCH_BY_ISBN
|
97
98
|
if trying_again
|
98
99
|
# on second attempt, try ISBN-10...
|
@@ -105,7 +106,7 @@ module Alexandria
|
|
105
106
|
CGI.escape(search_term)
|
106
107
|
end
|
107
108
|
|
108
|
-
BASE_SEARCH_URL
|
109
|
+
format(BASE_SEARCH_URL, search_term_encoded, search_type_code)
|
109
110
|
end
|
110
111
|
|
111
112
|
def parse_search_result_data(html)
|
@@ -122,35 +123,33 @@ module Alexandria
|
|
122
123
|
book_search_results = []
|
123
124
|
# each result will be a dict with keys :title, :author, :publisher, :url
|
124
125
|
|
125
|
-
list_items = doc.search(
|
126
|
+
list_items = doc.search("div.pesquisa-item-lista-conteudo")
|
126
127
|
list_items.each do |item|
|
127
|
-
|
128
|
-
result = {}
|
128
|
+
result = {}
|
129
129
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
130
|
+
# author & publisher
|
131
|
+
author_publisher = ""
|
132
|
+
item.children.each do |node|
|
133
|
+
author_publisher += node.to_s if node.text?
|
134
|
+
author_publisher.strip!
|
135
|
+
break unless author_publisher.empty?
|
136
|
+
end
|
137
|
+
author, publisher = author_publisher.split("/")
|
138
|
+
result[:author] = author.strip if author
|
139
|
+
result[:publisher] = publisher.strip if publisher
|
140
140
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
141
|
+
# title & url
|
142
|
+
link = item % "a"
|
143
|
+
result[:title] = link.inner_text.strip
|
144
|
+
link_to_description = link["href"]
|
145
|
+
slash = ""
|
146
|
+
slash = "/" unless link_to_description.start_with?("/")
|
147
|
+
result[:url] = "#{SITE}#{slash}#{link_to_description}"
|
148
148
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
end
|
149
|
+
book_search_results << result
|
150
|
+
rescue StandardError => ex
|
151
|
+
trace = ex.backtrace.join("\n> ")
|
152
|
+
log.error { "Failed parsing Siciliano search page #{ex.message}\n#{trace}" }
|
154
153
|
end
|
155
154
|
|
156
155
|
book_search_results
|
@@ -160,61 +159,60 @@ module Alexandria
|
|
160
159
|
# checked against Siciliano website 21 Feb 2009
|
161
160
|
doc = html_to_doc(html)
|
162
161
|
# title
|
163
|
-
title_div = doc %
|
162
|
+
title_div = doc % "div#conteudo//div.titulo"
|
164
163
|
raise NoResultsError unless title_div
|
165
|
-
|
164
|
+
|
165
|
+
title_h = title_div % "h2"
|
166
166
|
title = title_h.inner_text if title_h
|
167
167
|
# title = first_non_empty_text_node(title_div)
|
168
168
|
# author_spans = doc/'span.rotulo'
|
169
|
-
author_hs = title_div /
|
169
|
+
author_hs = title_div / "h3.autor"
|
170
170
|
authors = []
|
171
171
|
author_hs.each do |h|
|
172
172
|
authors << h.inner_text.strip
|
173
173
|
end
|
174
174
|
## synopsis_div = doc % 'div#sinopse'
|
175
|
-
details_div = doc %
|
175
|
+
details_div = doc % "div#tab-caracteristica"
|
176
176
|
details = string_array_to_map(lines_of_text_as_array(details_div))
|
177
177
|
# ISBN
|
178
|
-
isbn = details[
|
178
|
+
isbn = details["ISBN"]
|
179
179
|
## ean = details["CdBarras"]
|
180
|
-
translator = details[
|
180
|
+
translator = details["Tradutor"]
|
181
181
|
authors << translator if translator
|
182
|
-
binding = details[
|
182
|
+
binding = details["Acabamento"]
|
183
183
|
publisher = search_result[:publisher]
|
184
184
|
# publish year
|
185
185
|
publish_year = nil
|
186
|
-
edition = details[
|
187
|
-
|
188
|
-
|
189
|
-
publish_year = Regexp.last_match[1].to_i
|
190
|
-
end
|
191
|
-
end
|
186
|
+
edition = details["Edio"]
|
187
|
+
# publication date
|
188
|
+
publish_year = Regexp.last_match[1].to_i if edition && edition =~ /([12][0-9]{3})/
|
192
189
|
# cover
|
193
190
|
# ImgSrc[1]="/imagem/imagem.dll?pro_id=1386929&PIM_Id=658849";
|
194
191
|
image_urls = []
|
195
|
-
(doc /
|
192
|
+
(doc / "script").each do |script|
|
196
193
|
next if script.children.nil?
|
194
|
+
|
197
195
|
script.children.each do |ch|
|
198
196
|
ch_text = ch.to_s
|
199
|
-
if ch_text =~ /ImgSrc\[
|
197
|
+
if ch_text =~ /ImgSrc\[\d\]="(.+)";/
|
200
198
|
img_link = Regexp.last_match[1]
|
201
199
|
image_urls << img_link
|
202
200
|
end
|
203
201
|
end
|
204
202
|
end
|
205
203
|
book = Book.new(title, authors, isbn, publisher, publish_year, binding)
|
206
|
-
|
207
|
-
|
208
|
-
rescue => ex
|
204
|
+
[book, image_urls.first]
|
205
|
+
rescue StandardError => ex
|
209
206
|
trace = ex.backtrace.join("\n> ")
|
210
207
|
log.error { "Failed parsing Siciliano product page #{ex.message}\n#{trace}" }
|
211
|
-
|
208
|
+
nil
|
212
209
|
end
|
213
210
|
|
214
211
|
def first_non_empty_text_node(elem)
|
215
|
-
text =
|
212
|
+
text = ""
|
216
213
|
elem.children.each do |node|
|
217
214
|
next unless node.text?
|
215
|
+
|
218
216
|
text = node.to_s.strip
|
219
217
|
break unless text.empty?
|
220
218
|
end
|
@@ -223,28 +221,28 @@ module Alexandria
|
|
223
221
|
|
224
222
|
def lines_of_text_as_array(elem)
|
225
223
|
lines = []
|
226
|
-
current_text =
|
224
|
+
current_text = ""
|
227
225
|
elem.children.each do |e|
|
228
226
|
if e.text?
|
229
227
|
current_text += e.to_s
|
230
|
-
elsif e.name ==
|
228
|
+
elsif e.name == "br"
|
231
229
|
lines << current_text.strip
|
232
|
-
current_text =
|
230
|
+
current_text = ""
|
233
231
|
else
|
234
232
|
current_text += e.inner_text
|
235
233
|
end
|
236
234
|
end
|
237
235
|
lines << current_text.strip
|
238
|
-
lines.delete(
|
236
|
+
lines.delete("")
|
239
237
|
lines
|
240
238
|
end
|
241
239
|
|
242
240
|
def string_array_to_map(arr)
|
243
241
|
map = {}
|
244
242
|
arr.each do |str|
|
245
|
-
key, val = str.split(
|
243
|
+
key, val = str.split(":")
|
246
244
|
# a real hack for not handling encoding properly :^)
|
247
|
-
map[key.gsub(/[^a-zA-Z]/,
|
245
|
+
map[key.gsub(/[^a-zA-Z]/, "")] = val.strip if val
|
248
246
|
end
|
249
247
|
map
|
250
248
|
end
|