alexandria-book-collection-manager 0.7.8 → 0.7.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ruby.yml +45 -50
  3. data/.rubocop.yml +18 -5
  4. data/.rubocop_todo.yml +29 -22
  5. data/CHANGELOG.md +29 -0
  6. data/ChangeLog.0 +19 -19
  7. data/INSTALL.md +3 -5
  8. data/README.md +0 -5
  9. data/Rakefile +11 -11
  10. data/alexandria-book-collection-manager.gemspec +35 -34
  11. data/doc/FAQ +2 -2
  12. data/lib/alexandria/about.rb +1 -1
  13. data/lib/alexandria/book_providers/bl_provider.rb +88 -0
  14. data/lib/alexandria/book_providers/loc_provider.rb +38 -0
  15. data/lib/alexandria/book_providers/pseudomarc.rb +1 -1
  16. data/lib/alexandria/book_providers/sbn_provider.rb +108 -0
  17. data/lib/alexandria/book_providers/thalia_provider.rb +1 -1
  18. data/lib/alexandria/book_providers/web.rb +2 -2
  19. data/lib/alexandria/book_providers/worldcat.rb +9 -7
  20. data/lib/alexandria/book_providers/z3950_provider.rb +199 -0
  21. data/lib/alexandria/book_providers.rb +10 -25
  22. data/lib/alexandria/default_preferences.rb +1 -1
  23. data/lib/alexandria/export_library.rb +10 -10
  24. data/lib/alexandria/image_fetcher.rb +25 -0
  25. data/lib/alexandria/import_library.rb +9 -9
  26. data/lib/alexandria/library_store.rb +3 -4
  27. data/lib/alexandria/models/book.rb +13 -0
  28. data/lib/alexandria/models/library.rb +13 -21
  29. data/lib/alexandria/preferences.rb +4 -6
  30. data/lib/alexandria/scanners/cue_cat.rb +1 -1
  31. data/lib/alexandria/ui/about_dialog.rb +1 -1
  32. data/lib/alexandria/ui/acquire_dialog.rb +6 -9
  33. data/lib/alexandria/ui/barcode_animation.rb +1 -1
  34. data/lib/alexandria/ui/book_properties_dialog_base.rb +2 -6
  35. data/lib/alexandria/ui/completion_models.rb +1 -5
  36. data/lib/alexandria/ui/conflict_while_copying_dialog.rb +1 -1
  37. data/lib/alexandria/ui/listview.rb +1 -1
  38. data/lib/alexandria/ui/multi_drag_treeview.rb +1 -1
  39. data/lib/alexandria/ui/new_book_dialog.rb +11 -13
  40. data/lib/alexandria/ui/new_book_dialog_manual.rb +1 -1
  41. data/lib/alexandria/ui/preferences_dialog.rb +2 -2
  42. data/lib/alexandria/ui/provider_preferences_base_dialog.rb +1 -1
  43. data/lib/alexandria/ui/really_delete_dialog.rb +1 -1
  44. data/lib/alexandria/ui/ui_manager.rb +14 -22
  45. data/lib/alexandria/version.rb +1 -1
  46. data/po/cs.po +90 -125
  47. data/po/cy.po +87 -125
  48. data/po/de.po +96 -125
  49. data/po/el.po +96 -125
  50. data/po/es.po +96 -125
  51. data/po/fr.po +90 -125
  52. data/po/ga.po +83 -124
  53. data/po/gl.po +90 -125
  54. data/po/it.po +90 -125
  55. data/po/ja.po +90 -125
  56. data/po/mk.po +96 -125
  57. data/po/nb.po +90 -125
  58. data/po/nl.po +107 -124
  59. data/po/pl.po +113 -124
  60. data/po/pt.po +90 -125
  61. data/po/pt_BR.po +90 -125
  62. data/po/ru.po +92 -124
  63. data/po/sk.po +90 -125
  64. data/po/sv.po +90 -125
  65. data/po/uk.po +90 -125
  66. data/po/zh_TW.po +90 -125
  67. data/schemas/alexandria.schemas +1 -1
  68. data/share/gnome/help/alexandria/C/adding-books.xml +3 -4
  69. data/share/gnome/help/alexandria/C/introduction.xml +0 -16
  70. data/share/gnome/help/alexandria/C/searching.xml +1 -4
  71. data/share/gnome/help/alexandria/C/settings.xml +0 -30
  72. data/share/gnome/help/alexandria/fr/alexandria.xml +4 -159
  73. data/share/gnome/help/alexandria/ja/adding-books.xml +1 -1
  74. data/share/gnome/help/alexandria/ja/introduction.xml +0 -15
  75. data/share/gnome/help/alexandria/ja/searching.xml +3 -7
  76. data/share/gnome/help/alexandria/ja/settings.xml +0 -27
  77. data/spec/alexandria/book_providers/bl_provider_spec.rb +13 -0
  78. data/spec/alexandria/book_providers/loc_provider_spec.rb +17 -0
  79. data/spec/alexandria/book_providers/sbn_provider_spec.rb +13 -0
  80. data/spec/alexandria/book_providers_spec.rb +0 -81
  81. data/spec/alexandria/library_spec.rb +20 -2
  82. data/spec/alexandria/ui/import_dialog_spec.rb +1 -1
  83. data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +1 -1
  84. data/spec/alexandria/ui/preferences_dialog_spec.rb +1 -1
  85. data/spec/alexandria/ui/ui_manager_spec.rb +78 -2
  86. data/spec/data/libraries/0.6.2/My Library/9780571147168.yaml +2 -0
  87. data/util/rake/fileinstall.rb +4 -4
  88. data/util/rake/omfgenerate.rb +1 -1
  89. metadata +69 -55
  90. data/lib/alexandria/book_providers/adlibris.rb +0 -191
  91. data/lib/alexandria/book_providers/amazon_aws.rb +0 -239
  92. data/lib/alexandria/book_providers/amazon_ecs_util.rb +0 -373
  93. data/lib/alexandria/book_providers/barnes_and_noble.rb +0 -209
  94. data/lib/alexandria/book_providers/proxis.rb +0 -176
  95. data/lib/alexandria/book_providers/siciliano.rb +0 -256
  96. data/lib/alexandria/book_providers/z3950.rb +0 -408
@@ -1,209 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # -*- ruby -*-
4
- #
5
- # Copyright (C) 2009 Cathal Mc Ginley
6
- # Copyright (C) 2011, 2014, 2015 Matijs van Zuijlen
7
- #
8
- # Alexandria is free software; you can redistribute it and/or
9
- # modify it under the terms of the GNU General Public License as
10
- # published by the Free Software Foundation; either version 2 of the
11
- # License, or (at your option) any later version.
12
- #
13
- # Alexandria is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- # General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU General Public
19
- # License along with Alexandria; see the file COPYING. If not,
20
- # write to the Free Software Foundation, Inc., 51 Franklin Street,
21
- # Fifth Floor, Boston, MA 02110-1301 USA.
22
-
23
- # http://en.wikipedia.org/wiki/Barnes_&_Noble
24
-
25
- # New BarnesAndNoble provider, taken from the Palatina MetaDataSource
26
- # and modified to fit the structure of Alexandria book providers.
27
- # Completely rewritten by Cathal Mc Ginley (18 Dec 2009)
28
-
29
- # NOTE: this modified version is based on the Alexandria WorldCat provider.
30
-
31
- require "cgi"
32
- require "alexandria/net"
33
- require "alexandria/book_providers/web"
34
-
35
- module Alexandria
36
- class BookProviders
37
- class BarnesAndNobleProvider < WebsiteBasedProvider
38
- include Logging
39
-
40
- SITE = "http://www.barnesandnoble.com"
41
-
42
- BASE_ISBN_SEARCH_URL = "http://www.barnesandnoble.com/s/%s"
43
-
44
- BASE_SEARCH_URL = "http://search.barnesandnoble.com/booksearch" \
45
- "/results.asp?%s=%s" # type, term
46
-
47
- def initialize
48
- super("BarnesAndNoble", "BarnesAndNoble")
49
- @agent = nil
50
- prefs.read
51
- end
52
-
53
- def agent
54
- @agent ||= Alexandria::WWWAgent.new
55
- @agent
56
- end
57
-
58
- def fetch_redirectly(uri_str, limit = 5)
59
- raise NoResultsError, _("HTTP redirect too deep") if limit.zero?
60
-
61
- if limit < 10
62
- sleep 0.1
63
- log.debug { "Redirectly :: #{uri_str}" }
64
- else
65
- log.debug { "Fetching :: #{uri_str}" }
66
- end
67
- response = agent.get(uri_str)
68
- log.debug { response.inspect }
69
- case response
70
- when Net::HTTPSuccess then response
71
- when Net::HTTPRedirection
72
- redirect = URI.parse response["Location"]
73
- redirect = URI.parse(uri_str) + redirect if redirect.relative?
74
- fetch_redirectly(redirect.to_s, (limit - 1))
75
- else
76
- response.error!
77
- end
78
- end
79
-
80
- def search(criterion, type)
81
- req = create_search_uri(type, criterion)
82
- log.debug { "Requesting #{req}" }
83
- html_data = fetch_redirectly(req)
84
-
85
- if type == SEARCH_BY_ISBN
86
- parse_result_data(html_data.body, criterion)
87
- else
88
- results = parse_search_result_data(html_data.body)
89
- raise NoResultsError if results.empty?
90
-
91
- results.map { |result| get_book_from_search_result(result) }
92
- end
93
- end
94
-
95
- def url(book)
96
- create_search_uri(SEARCH_BY_ISBN, book.isbn)
97
- rescue StandardError => ex
98
- log.warn { "Cannot create url for book #{book}; #{ex.message}" }
99
- nil
100
- end
101
-
102
- def create_search_uri(search_type, search_term)
103
- (search_type_code = {
104
- SEARCH_BY_AUTHORS => "ATH",
105
- SEARCH_BY_TITLE => "TTL",
106
- SEARCH_BY_KEYWORD => "WRD" # SEARCH_BY_PUBLISHER => 'PBL' # not implemented
107
- }[search_type]) || ""
108
- if search_type == SEARCH_BY_ISBN
109
- BASE_ISBN_SEARCH_URL % Library.canonicalise_ean(search_term) # isbn-13
110
- else
111
- search_term_encoded = CGI.escape(search_term)
112
- format(BASE_SEARCH_URL, search_type_code, search_term_encoded)
113
- end
114
- end
115
-
116
- def get_book_from_search_result(result)
117
- log.debug { "Fetching book from #{result[:url]}" }
118
- html_data = transport.get_response(URI.parse(result[:url]))
119
- parse_result_data(html_data.body)
120
- end
121
-
122
- def parse_search_result_data(html)
123
- doc = html_to_doc(html)
124
- book_search_results = []
125
- begin
126
- result_divs = doc / 'div[@class*="book-container"]'
127
- result_divs.each do |div|
128
- result = {}
129
- # img = div % 'div.book-image/a/img'
130
- # result[:image_url] = img['src'] if img
131
- title_header = div % "h2"
132
- title_links = title_header / "a"
133
- result[:title] = title_links.first.inner_text
134
- result[:url] = title_links.first["href"]
135
-
136
- book_search_results << result
137
- end
138
- rescue StandardError => ex
139
- trace = ex.backtrace.join("\n> ")
140
- log.warn do
141
- "Failed parsing search results for Barnes & Noble " \
142
- "#{ex.message} #{trace}"
143
- end
144
- end
145
- book_search_results
146
- end
147
-
148
- def parse_result_data(html, _search_isbn = nil, _recursing = false)
149
- doc = html_to_doc(html)
150
- begin
151
- book_data = {}
152
-
153
- dl = (doc / "dl").first
154
- dts = dl.children_of_type("dt")
155
- dts.each do |dt|
156
- value = dt.next_sibling.inner_text
157
- case dt.inner_text
158
- when /ISBN-13/
159
- book_data[:isbn] = Library.canonicalise_ean(value)
160
- when /Publisher/
161
- book_data[:publisher] = value
162
- when /Publication data/
163
- value =~ /\d{2}.\d{2}.(\d{4})/
164
- year = Regexp.last_match[1]
165
- book_data[:publisher] = year
166
- end
167
- end
168
-
169
- meta = doc / "meta"
170
- meta.each do |it|
171
- attrs = it.attributes
172
- property = attrs["property"]
173
- next unless property
174
-
175
- case property
176
- when "og:title"
177
- book_data[:title] = attrs["content"]
178
- when "og:image"
179
- book_data[:image_url] = attrs["content"]
180
- end
181
- end
182
-
183
- author_links = doc / "span.contributors a"
184
- authors = author_links.map(&:inner_text)
185
- book_data[:authors] = authors
186
-
187
- book_data[:binding] = ""
188
- selected_format = (doc / "#availableFormats li.selected a.tabTitle").first
189
- book_data[:binding] = selected_format.inner_text if selected_format
190
-
191
- book = Book.new(book_data[:title], book_data[:authors],
192
- book_data[:isbn], book_data[:publisher],
193
- book_data[:publication_year],
194
- book_data[:binding])
195
- [book, book_data[:image_url]]
196
- rescue StandardError => ex
197
- raise ex if ex.instance_of? NoResultsError
198
-
199
- trace = ex.backtrace.join("\n> ")
200
- log.warn do
201
- "Failed parsing search results for BarnesAndNoble " \
202
- "#{ex.message} #{trace}"
203
- end
204
- raise NoResultsError
205
- end
206
- end
207
- end
208
- end
209
- end
@@ -1,176 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # This file is part of Alexandria.
4
- #
5
- # See the file README.md for authorship and licensing information.
6
-
7
- # New Proxis provider, taken from Palatina MetaDataSource and modified
8
- # for Alexandria. (20 Dec 2009)
9
-
10
- require "cgi"
11
- require "alexandria/book_providers/web"
12
-
13
- module Alexandria
14
- class BookProviders
15
- class ProxisProvider < WebsiteBasedProvider
16
- # include GetText
17
- include Logging
18
- # GetText.bindtextdomain(Alexandria::TEXTDOMAIN, :charset => "UTF-8")
19
-
20
- # Proxis essentially has three book databases, NL, FR and EN.
21
- # Currently, this provider only searches the NL database, since
22
- # it adds most to Alexandria (Amazon already has French and
23
- # English titles).
24
-
25
- SITE = "http://www.proxis.nl"
26
- BASE_SEARCH_URL = "#{SITE}/NLNL/Search/IndexGSA.aspx?search=%s" \
27
- "&shop=100001NL&SelRubricLevel1Id=100001NL"
28
- ISBN_REDIRECT_BASE_URL = "#{SITE}/NLNL/Search/Index.aspx?search=%s" \
29
- "&shop=100001NL&SelRubricLevel1Id=100001NL"
30
-
31
- def initialize
32
- super("Proxis", "Proxis (Belgium)")
33
- # prefs.add("lang", _("Language"), "fr",
34
- # LANGUAGES.keys)
35
- prefs.read
36
- end
37
-
38
- def search(criterion, type)
39
- req = create_search_uri(type, criterion)
40
- log.debug { req }
41
- html_data = transport.get_response(URI.parse(req))
42
-
43
- results = parse_search_result_data(html_data.body)
44
- raise NoResultsError if results.empty?
45
-
46
- if type == SEARCH_BY_ISBN
47
- get_book_from_search_result(results.first)
48
- else
49
- results.map { |result| get_book_from_search_result(result) }
50
- end
51
- end
52
-
53
- def create_search_uri(search_type, search_term)
54
- if search_type == SEARCH_BY_ISBN
55
- BASE_SEARCH_URL % Library.canonicalise_ean(search_term)
56
- else
57
- BASE_SEARCH_URL % CGI.escape(search_term)
58
- end
59
- end
60
-
61
- def get_book_from_search_result(result)
62
- log.debug { "Fetching book from #{result[:lookup_url]}" }
63
- html_data = transport.get_response(URI.parse(result[:lookup_url]))
64
- parse_result_data(html_data.body)
65
- end
66
-
67
- def url(book)
68
- if book.isbn.nil? || book.isbn.empty?
69
- ISBN_REDIRECT_BASE_URL % Library.canonicalise_ean(book.isbn)
70
- end
71
- end
72
-
73
- ## from Palatina
74
- def text_of(node)
75
- if node.nil?
76
- nil
77
- elsif node.text?
78
- node.to_html
79
- elsif node.elem?
80
- if node.children.nil?
81
- nil
82
- else
83
- node_text = node.children.map { |n| text_of(n) }.join
84
- node_text.strip.squeeze(" ")
85
- end
86
- end
87
- end
88
-
89
- def parse_search_result_data(html)
90
- doc = html_to_doc(html)
91
- book_search_results = []
92
- items = doc.search("table.searchResult tr")
93
- items.each do |item|
94
- result = {}
95
- title_link = item % "h5 a"
96
- if title_link
97
- result[:title] = text_of(title_link)
98
- result[:lookup_url] = title_link["href"]
99
- unless result[:lookup_url].start_with?("http")
100
- result[:lookup_url] = "#{SITE}#{result[:lookup_url]}"
101
- end
102
- end
103
- book_search_results << result
104
- end
105
- # require 'pp'
106
- # pp book_search_results
107
- # raise :Ruckus
108
- book_search_results
109
- end
110
-
111
- def data_for_header(header)
112
- tr = header.parent
113
- td = tr.at("td")
114
- text_of(td) if td
115
- end
116
-
117
- def parse_result_data(html)
118
- doc = html_to_doc(html)
119
- book_data = {}
120
- book_data[:authors] = []
121
- # TITLE
122
- if (title_header = doc.search("div.detailBlock h3"))
123
- header_spans = title_header.first.search("span")
124
- title = text_of(header_spans.first)
125
- title = Regexp.last_match[1].strip if title =~ /(.+)-$/
126
- book_data[:title] = title
127
- end
128
-
129
- info_headers = doc.search("table.productInfoTable th")
130
-
131
- isbns = []
132
- unless info_headers.empty?
133
- info_headers.each do |th|
134
- isbns << data_for_header(th) if /(ISBN|EAN)/.match?(th.inner_text)
135
- end
136
- book_data[:isbn] = Library.canonicalise_ean(isbns.first)
137
- end
138
-
139
- # book = Book.new(title, ISBN.get(isbns.first))
140
-
141
- unless info_headers.empty?
142
- info_headers.each do |th|
143
- header_text = th.inner_text
144
- case header_text
145
- when /Type/
146
- book_data[:binding] = data_for_header(th)
147
- when /Verschijningsdatum/
148
- date = data_for_header(th)
149
- date =~ %r{/(\d{4})}
150
- book_data[:publish_year] = Regexp.last_match[1].to_i
151
- when /Auteur/
152
- book_data[:authors] << data_for_header(th)
153
- when /Uitgever/
154
- book_data[:publisher] = data_for_header(th)
155
- end
156
- end
157
- end
158
-
159
- image_url = nil
160
- if (cover_img = doc.at("img[@id$='imgProduct']"))
161
- image_url = if cover_img["src"].start_with?("http")
162
- cover_img["src"]
163
- else
164
- "#{SITE}/#{cover_img['src']}" # TODO: use html <base>
165
- end
166
- image_url = nil if /ProductNoCover/.match?(image_url)
167
- end
168
-
169
- book = Book.new(book_data[:title], book_data[:authors],
170
- book_data[:isbn], book_data[:publisher],
171
- book_data[:publish_year], book_data[:binding])
172
- [book, image_url]
173
- end
174
- end
175
- end
176
- end
@@ -1,256 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Copyright (C) 2004 Laurent Sansonetti
4
- # Copyright (C) 2007 Laurent Sansonetti and Marco Costantini
5
- # Copyright (C) 2009 Cathal Mc Ginley
6
- # Copyright (C) 2011, 2014, 2016 Matijs van Zuijlen
7
- #
8
- # Alexandria is free software; you can redistribute it and/or
9
- # modify it under the terms of the GNU General Public License as
10
- # published by the Free Software Foundation; either version 2 of the
11
- # License, or (at your option) any later version.
12
- #
13
- # Alexandria is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- # General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU General Public
19
- # License along with Alexandria; see the file COPYING. If not,
20
- # write to the Free Software Foundation, Inc., 51 Franklin Street,
21
- # Fifth Floor, Boston, MA 02110-1301 USA.
22
-
23
- # Adapted code from 'bn.rb' (I hope that it works!)
24
-
25
- # Almost completely rewritten by Cathal Mc Ginley (21 Feb 2009)
26
- # based on the new code for Palatina
27
-
28
- require "net/http"
29
- require "cgi"
30
- require "alexandria/book_providers/web"
31
-
32
- module Alexandria
33
- class BookProviders
34
- class SicilianoProvider < WebsiteBasedProvider
35
- include Logging
36
-
37
- SITE = "http://www.siciliano.com.br"
38
-
39
- # The string interpolations in this URL are the search term and search
40
- # type, respectively.
41
- BASE_SEARCH_URL = "#{SITE}/pesquisaweb/pesquisaweb.dll/pesquisa?" \
42
- "&FIL_ID=102" \
43
- "&PALAVRASN1=%s" \
44
- "&FILTRON1=%s" \
45
- "&ESTRUTN1=0301&ORDEMN2=E"
46
-
47
- def initialize
48
- super("Siciliano", "Livraria Siciliano (Brasil)")
49
- # no preferences for the moment
50
- prefs.read
51
- end
52
-
53
- def get_book_from_search_result(result)
54
- log.info { "Fetching book from #{result[:url]}" }
55
- html_data = transport.get(URI.parse(result[:url]))
56
- parse_result_data(html_data, result)
57
- end
58
-
59
- def search(criterion, type)
60
- criterion = criterion.encode("ISO-8859-1") # still needed??
61
- trying_again = false
62
- begin
63
- req = create_search_uri(type, criterion, trying_again)
64
- log.debug { "#{name} #{trying_again ? 'retrying ' : ''}request = #{req}" }
65
- data = transport.get(URI.parse(req))
66
- results = parse_search_result_data(data)
67
- raise NoResultsError if results.empty?
68
-
69
- if type == SEARCH_BY_ISBN
70
- get_book_from_search_result(results.first)
71
- else
72
- results.map { |result| get_book_from_search_result(result) }
73
- end
74
- rescue NoResultsError => ex
75
- if (type == SEARCH_BY_ISBN) && (trying_again == false)
76
- trying_again = true
77
- retry
78
- end
79
-
80
- raise ex
81
- end
82
- end
83
-
84
- # the new Siciliano website no longer has direct links to books by their ISBN
85
- # (the permalink now seems to be based on the product id)
86
- def url(_book)
87
- nil
88
- end
89
-
90
- private
91
-
92
- def create_search_uri(search_type, search_term, trying_again = false)
93
- (search_type_code = { SEARCH_BY_ISBN => "G",
94
- SEARCH_BY_TITLE => "A",
95
- SEARCH_BY_AUTHORS => "B",
96
- SEARCH_BY_KEYWORD => "X" }[search_type]) || "X"
97
- search_term_encoded = if search_type == SEARCH_BY_ISBN
98
- if trying_again
99
- # on second attempt, try ISBN-10...
100
- Library.canonicalise_isbn(search_term) # isbn-10
101
- else
102
- # search by ISBN-13 first
103
- Library.canonicalise_ean(search_term) # isbn-13
104
- end
105
- else
106
- CGI.escape(search_term)
107
- end
108
-
109
- format(BASE_SEARCH_URL, search_term_encoded, search_type_code)
110
- end
111
-
112
- def parse_search_result_data(html)
113
- # The layout...
114
- # td[@class="normal"]
115
- # span[@class="vitrine_nome_produto"]
116
- # a (title and link to 'product page')
117
- # br
118
- # TEXT --> author / publisher
119
- # br
120
- # div[@class="vitrine_preco_por"] (price info)
121
-
122
- doc = html_to_doc(html)
123
- book_search_results = []
124
- # each result will be a dict with keys :title, :author, :publisher, :url
125
-
126
- list_items = doc.search("div.pesquisa-item-lista-conteudo")
127
- list_items.each do |item|
128
- result = {}
129
-
130
- # author & publisher
131
- author_publisher = ""
132
- item.children.each do |node|
133
- author_publisher += node.to_s if node.text?
134
- author_publisher.strip!
135
- break unless author_publisher.empty?
136
- end
137
- author, publisher = author_publisher.split("/")
138
- result[:author] = author.strip if author
139
- result[:publisher] = publisher.strip if publisher
140
-
141
- # title & url
142
- link = item % "a"
143
- result[:title] = link.inner_text.strip
144
- link_to_description = link["href"]
145
- slash = ""
146
- slash = "/" unless link_to_description.start_with?("/")
147
- result[:url] = "#{SITE}#{slash}#{link_to_description}"
148
-
149
- book_search_results << result
150
- rescue StandardError => ex
151
- trace = ex.backtrace.join("\n> ")
152
- log.error { "Failed parsing Siciliano search page #{ex.message}\n#{trace}" }
153
- end
154
-
155
- book_search_results
156
- end
157
-
158
- def parse_result_data(html, search_result)
159
- # checked against Siciliano website 21 Feb 2009
160
- doc = html_to_doc(html)
161
- # title
162
- title_div = doc % "div#conteudo//div.titulo"
163
- raise NoResultsError unless title_div
164
-
165
- title_h = title_div % "h2"
166
- title = title_h.inner_text if title_h
167
- # title = first_non_empty_text_node(title_div)
168
- # author_spans = doc/'span.rotulo'
169
- author_hs = title_div / "h3.autor"
170
- authors = []
171
- author_hs.each do |h|
172
- authors << h.inner_text.strip
173
- end
174
- ## synopsis_div = doc % 'div#sinopse'
175
- details_div = doc % "div#tab-caracteristica"
176
- details = string_array_to_map(lines_of_text_as_array(details_div))
177
- # ISBN
178
- isbn = details["ISBN"]
179
- ## ean = details["CdBarras"]
180
- translator = details["Tradutor"]
181
- authors << translator if translator
182
- binding = details["Acabamento"]
183
- publisher = search_result[:publisher]
184
- # publish year
185
- publish_year = nil
186
- edition = details["Edio"]
187
- # publication date
188
- publish_year = Regexp.last_match[1].to_i if edition && edition =~ /([12][0-9]{3})/
189
- # cover
190
- # ImgSrc[1]="/imagem/imagem.dll?pro_id=1386929&PIM_Id=658849";
191
- image_urls = []
192
- (doc / "script").each do |script|
193
- next if script.children.nil?
194
-
195
- script.children.each do |ch|
196
- ch_text = ch.to_s
197
- if ch_text =~ /ImgSrc\[\d\]="(.+)";/
198
- img_link = Regexp.last_match[1]
199
- image_urls << img_link
200
- end
201
- end
202
- end
203
- book = Book.new(title, authors, isbn, publisher, publish_year, binding)
204
- [book, image_urls.first]
205
- rescue StandardError => ex
206
- trace = ex.backtrace.join("\n> ")
207
- log.error { "Failed parsing Siciliano product page #{ex.message}\n#{trace}" }
208
- nil
209
- end
210
-
211
- def first_non_empty_text_node(elem)
212
- text = ""
213
- elem.children.each do |node|
214
- next unless node.text?
215
-
216
- text = node.to_s.strip
217
- break unless text.empty?
218
- end
219
- text
220
- end
221
-
222
- def lines_of_text_as_array(elem)
223
- lines = []
224
- current_text = ""
225
- elem.children.each do |e|
226
- if e.text?
227
- current_text += e.to_s
228
- elsif e.name == "br"
229
- lines << current_text.strip
230
- current_text = ""
231
- else
232
- current_text += e.inner_text
233
- end
234
- end
235
- lines << current_text.strip
236
- lines.delete("")
237
- lines
238
- end
239
-
240
- def string_array_to_map(arr)
241
- map = {}
242
- arr.each do |str|
243
- key, val = str.split(":")
244
- # a real hack for not handling encoding properly :^)
245
- map[key.gsub(/[^a-zA-Z]/, "")] = val.strip if val
246
- end
247
- map
248
- end
249
-
250
- # def binding_type(binding) # portuguese string
251
- # {"brochura" => :paperback,
252
- # "encadernado" => :hardback}[binding.downcase] or :unknown
253
- # end
254
- end
255
- end
256
- end