alexandria-book-collection-manager 0.7.3 → 0.7.8

Sign up to get free protection for your applications and to get access to all the features.
Files changed (192) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +9 -0
  3. data/.github/workflows/ruby.yml +77 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +86 -36
  6. data/.rubocop_todo.yml +58 -161
  7. data/.simplecov +5 -2
  8. data/CHANGELOG.md +56 -2
  9. data/Gemfile +4 -3
  10. data/INSTALL.md +23 -11
  11. data/README.md +52 -41
  12. data/Rakefile +78 -75
  13. data/alexandria-book-collection-manager.gemspec +50 -44
  14. data/bin/alexandria +12 -22
  15. data/doc/FAQ +1 -2
  16. data/doc/dependency_decisions.yml +27 -8
  17. data/lib/alexandria.rb +25 -23
  18. data/lib/alexandria/about.rb +50 -50
  19. data/lib/alexandria/book_providers.rb +86 -91
  20. data/lib/alexandria/book_providers/adlibris.rb +37 -74
  21. data/lib/alexandria/book_providers/amazon_aws.rb +94 -100
  22. data/lib/alexandria/book_providers/amazon_ecs_util.rb +289 -324
  23. data/lib/alexandria/book_providers/barnes_and_noble.rb +42 -42
  24. data/lib/alexandria/book_providers/douban.rb +25 -41
  25. data/lib/alexandria/book_providers/proxis.rb +34 -29
  26. data/lib/alexandria/book_providers/pseudomarc.rb +77 -85
  27. data/lib/alexandria/book_providers/siciliano.rb +60 -64
  28. data/lib/alexandria/book_providers/thalia_provider.rb +161 -0
  29. data/lib/alexandria/book_providers/web.rb +5 -5
  30. data/lib/alexandria/book_providers/worldcat.rb +66 -95
  31. data/lib/alexandria/book_providers/z3950.rb +153 -169
  32. data/lib/alexandria/config.rb +1 -1
  33. data/lib/alexandria/console.rb +3 -3
  34. data/lib/alexandria/default_preferences.rb +37 -0
  35. data/lib/alexandria/execution_queue.rb +13 -12
  36. data/lib/alexandria/export_format.rb +8 -8
  37. data/lib/alexandria/export_library.rb +128 -127
  38. data/lib/alexandria/import_library.rb +102 -126
  39. data/lib/alexandria/import_library_csv.rb +41 -41
  40. data/lib/alexandria/library_collection.rb +6 -5
  41. data/lib/alexandria/library_sort_order.rb +4 -2
  42. data/lib/alexandria/library_store.rb +39 -28
  43. data/lib/alexandria/logging.rb +10 -14
  44. data/lib/alexandria/models/book.rb +5 -4
  45. data/lib/alexandria/models/library.rb +63 -53
  46. data/lib/alexandria/net.rb +5 -6
  47. data/lib/alexandria/preferences.rb +66 -63
  48. data/lib/alexandria/scanners.rb +2 -2
  49. data/lib/alexandria/scanners/{cuecat.rb → cue_cat.rb} +17 -17
  50. data/lib/alexandria/scanners/keyboard.rb +8 -8
  51. data/lib/alexandria/smart_library.rb +110 -112
  52. data/lib/alexandria/ui.rb +15 -15
  53. data/lib/alexandria/ui/{dialogs/about_dialog.rb → about_dialog.rb} +2 -2
  54. data/lib/alexandria/ui/{dialogs/acquire_dialog.rb → acquire_dialog.rb} +108 -109
  55. data/lib/alexandria/ui/alert_dialog.rb +66 -0
  56. data/lib/alexandria/ui/{dialogs/bad_isbns_dialog.rb → bad_isbns_dialog.rb} +13 -9
  57. data/lib/alexandria/ui/{dialogs/barcode_animation.rb → barcode_animation.rb} +16 -15
  58. data/lib/alexandria/ui/{dialogs/book_properties_dialog.rb → book_properties_dialog.rb} +25 -38
  59. data/lib/alexandria/ui/{dialogs/book_properties_dialog_base.rb → book_properties_dialog_base.rb} +64 -157
  60. data/lib/alexandria/ui/builder_base.rb +1 -1
  61. data/lib/alexandria/ui/calendar_popup.rb +58 -0
  62. data/lib/alexandria/ui/callbacks.rb +187 -155
  63. data/lib/alexandria/ui/completion_models.rb +8 -22
  64. data/lib/alexandria/ui/confirm_erase_dialog.rb +33 -0
  65. data/lib/alexandria/ui/conflict_while_copying_dialog.rb +34 -0
  66. data/lib/alexandria/ui/dndable.rb +7 -7
  67. data/lib/alexandria/ui/error_dialog.rb +25 -0
  68. data/lib/alexandria/ui/{dialogs/export_dialog.rb → export_dialog.rb} +37 -58
  69. data/lib/alexandria/ui/icons.rb +38 -43
  70. data/lib/alexandria/ui/iconview.rb +12 -10
  71. data/lib/alexandria/ui/iconview_tooltips.rb +41 -54
  72. data/lib/alexandria/ui/import_dialog.rb +157 -0
  73. data/lib/alexandria/ui/init.rb +30 -41
  74. data/lib/alexandria/ui/{dialogs/keep_bad_isbn_dialog.rb → keep_bad_isbn_dialog.rb} +9 -6
  75. data/lib/alexandria/ui/libraries_combo.rb +15 -14
  76. data/lib/alexandria/ui/listview.rb +69 -67
  77. data/lib/alexandria/ui/main_app.rb +24 -26
  78. data/lib/alexandria/ui/misc_dialogs.rb +10 -0
  79. data/lib/alexandria/ui/multi_drag_treeview.rb +8 -9
  80. data/lib/alexandria/ui/{dialogs/new_book_dialog.rb → new_book_dialog.rb} +113 -114
  81. data/lib/alexandria/ui/{dialogs/new_book_dialog_manual.rb → new_book_dialog_manual.rb} +22 -19
  82. data/lib/alexandria/ui/new_provider_dialog.rb +100 -0
  83. data/lib/alexandria/ui/new_smart_library_dialog.rb +74 -0
  84. data/lib/alexandria/ui/preferences_dialog.rb +313 -0
  85. data/lib/alexandria/ui/provider_preferences_base_dialog.rb +95 -0
  86. data/lib/alexandria/ui/provider_preferences_dialog.rb +35 -0
  87. data/lib/alexandria/ui/{dialogs/misc_dialogs.rb → really_delete_dialog.rb} +7 -28
  88. data/lib/alexandria/ui/{sidepane.rb → sidepane_manager.rb} +53 -48
  89. data/lib/alexandria/ui/skip_entry_dialog.rb +33 -0
  90. data/lib/alexandria/ui/smart_library_properties_dialog.rb +60 -0
  91. data/lib/alexandria/ui/smart_library_properties_dialog_base.rb +242 -0
  92. data/lib/alexandria/ui/smart_library_rule_box.rb +119 -0
  93. data/lib/alexandria/ui/sound.rb +11 -13
  94. data/lib/alexandria/ui/ui_manager.rb +216 -200
  95. data/lib/alexandria/version.rb +4 -19
  96. data/lib/alexandria/web_themes.rb +21 -21
  97. data/po/Makefile +2 -2
  98. data/po/cs.po +992 -875
  99. data/po/cy.po +961 -874
  100. data/po/de.po +990 -865
  101. data/po/el.po +989 -865
  102. data/po/es.po +985 -861
  103. data/po/fr.po +987 -867
  104. data/po/ga.po +908 -820
  105. data/po/gl.po +980 -860
  106. data/po/it.po +986 -864
  107. data/po/ja.po +969 -849
  108. data/po/mk.po +984 -860
  109. data/po/nb.po +979 -859
  110. data/po/nl.po +983 -860
  111. data/po/pl.po +1018 -971
  112. data/po/pt.po +988 -857
  113. data/po/pt_BR.po +983 -863
  114. data/po/ru.po +994 -871
  115. data/po/sk.po +989 -867
  116. data/po/sv.po +976 -856
  117. data/po/uk.po +972 -858
  118. data/po/zh_TW.po +974 -854
  119. data/schemas/alexandria.schemas +24 -2
  120. data/share/alexandria/glade/acquire_dialog__builder.glade +1 -1
  121. data/share/alexandria/glade/book_properties_dialog__builder.glade +1 -1
  122. data/share/alexandria/glade/main_app__builder.glade +6 -21
  123. data/share/alexandria/glade/new_book_dialog__builder.glade +1 -1
  124. data/share/alexandria/glade/preferences_dialog__builder.glade +1 -1
  125. data/share/gnome/help/alexandria/C/introduction.xml +0 -4
  126. data/share/gnome/help/alexandria/C/searching.xml +1 -1
  127. data/share/gnome/help/alexandria/C/smart-libraries.xml +2 -2
  128. data/share/gnome/help/alexandria/C/working-with-libraries.xml +1 -1
  129. data/share/gnome/help/alexandria/fr/alexandria.xml +1 -1
  130. data/share/gnome/help/alexandria/ja/introduction.xml +0 -4
  131. data/share/gnome/help/alexandria/ja/smart-libraries.xml +1 -1
  132. data/spec/alexandria/book_providers/thalia_provider_spec.rb +119 -0
  133. data/spec/alexandria/book_providers/world_cat_provider_spec.rb +160 -0
  134. data/spec/alexandria/book_providers_spec.rb +62 -156
  135. data/spec/alexandria/book_spec.rb +12 -10
  136. data/spec/alexandria/console_spec.rb +6 -11
  137. data/spec/alexandria/export_library_spec.rb +47 -58
  138. data/spec/alexandria/library_spec.rb +121 -109
  139. data/spec/alexandria/library_store_spec.rb +8 -8
  140. data/spec/alexandria/preferences_spec.rb +44 -17
  141. data/spec/alexandria/scanners/cue_cat_spec.rb +52 -0
  142. data/spec/alexandria/smart_library_spec.rb +15 -15
  143. data/spec/alexandria/ui/about_dialog_spec.rb +14 -0
  144. data/spec/alexandria/ui/acquire_dialog_spec.rb +14 -0
  145. data/spec/alexandria/ui/alert_dialog_spec.rb +16 -0
  146. data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +14 -0
  147. data/spec/alexandria/ui/book_properties_dialog_spec.rb +59 -0
  148. data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +14 -0
  149. data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +16 -0
  150. data/spec/alexandria/ui/error_dialog_spec.rb +14 -0
  151. data/spec/alexandria/ui/export_dialog_spec.rb +36 -0
  152. data/spec/alexandria/ui/icons_spec.rb +26 -0
  153. data/spec/alexandria/ui/iconview_spec.rb +7 -21
  154. data/spec/alexandria/ui/import_dialog_spec.rb +46 -0
  155. data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +17 -0
  156. data/spec/alexandria/ui/main_app_spec.rb +7 -34
  157. data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +51 -0
  158. data/spec/alexandria/ui/{dialogs/new_book_dialog_spec.rb → new_book_dialog_spec.rb} +4 -4
  159. data/spec/alexandria/ui/new_provider_dialog_spec.rb +30 -0
  160. data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +39 -0
  161. data/spec/alexandria/ui/preferences_dialog_spec.rb +14 -0
  162. data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +34 -0
  163. data/spec/alexandria/ui/really_delete_dialog_spec.rb +16 -0
  164. data/spec/alexandria/ui/sidepane_manager_spec.rb +15 -0
  165. data/spec/alexandria/ui/skip_entry_dialog_spec.rb +14 -0
  166. data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +49 -0
  167. data/spec/alexandria/ui/sound_spec.rb +2 -2
  168. data/spec/alexandria/ui/ui_manager_spec.rb +44 -20
  169. data/spec/end_to_end/basic_run_spec.rb +21 -38
  170. data/spec/fixtures/cover.jpg +0 -0
  171. data/spec/spec_helper.rb +54 -10
  172. data/tasks/setup.rb +2 -2
  173. data/tasks/spec.rake +11 -11
  174. data/util/rake/fileinstall.rb +38 -35
  175. data/util/rake/gettextgenerate.rb +7 -7
  176. data/util/rake/omfgenerate.rb +7 -7
  177. metadata +158 -45
  178. data/dogtail/basic_run_test.py +0 -9
  179. data/lib/alexandria/book_providers/renaud.rb +0 -155
  180. data/lib/alexandria/book_providers/thalia.rb +0 -198
  181. data/lib/alexandria/ui/dialogs/alert_dialog.rb +0 -63
  182. data/lib/alexandria/ui/dialogs/import_dialog.rb +0 -176
  183. data/lib/alexandria/ui/dialogs/new_smart_library_dialog.rb +0 -62
  184. data/lib/alexandria/ui/dialogs/preferences_dialog.rb +0 -563
  185. data/lib/alexandria/ui/dialogs/smart_library_properties_dialog.rb +0 -61
  186. data/lib/alexandria/ui/dialogs/smart_library_properties_dialog_base.rb +0 -423
  187. data/spec/alexandria/scanners/cuecat_spec.rb +0 -67
  188. data/spec/alexandria/ui/dialogs_spec.rb +0 -162
  189. data/spec/alexandria/ui/sidepane_spec.rb +0 -29
  190. data/spec/alexandria/ui/ui_utilities_spec.rb +0 -62
  191. data/spec/alexandria/utilities_spec.rb +0 -52
  192. data/tasks/dogtail.rake +0 -6
@@ -25,27 +25,27 @@
25
25
  # Almost completely rewritten by Cathal Mc Ginley (21 Feb 2009)
26
26
  # based on the new code for Palatina
27
27
 
28
- require 'net/http'
29
- require 'cgi'
30
- require 'alexandria/book_providers/web'
28
+ require "net/http"
29
+ require "cgi"
30
+ require "alexandria/book_providers/web"
31
31
 
32
32
  module Alexandria
33
33
  class BookProviders
34
34
  class SicilianoProvider < WebsiteBasedProvider
35
35
  include Logging
36
36
 
37
- SITE = 'http://www.siciliano.com.br'
37
+ SITE = "http://www.siciliano.com.br"
38
38
 
39
39
  # The string interpolations in this URL are the search term and search
40
40
  # type, respectively.
41
41
  BASE_SEARCH_URL = "#{SITE}/pesquisaweb/pesquisaweb.dll/pesquisa?" \
42
- '&FIL_ID=102' \
43
- '&PALAVRASN1=%s' \
44
- '&FILTRON1=%s' \
45
- '&ESTRUTN1=0301&ORDEMN2=E'
42
+ "&FIL_ID=102" \
43
+ "&PALAVRASN1=%s" \
44
+ "&FILTRON1=%s" \
45
+ "&ESTRUTN1=0301&ORDEMN2=E"
46
46
 
47
47
  def initialize
48
- super('Siciliano', 'Livraria Siciliano (Brasil)')
48
+ super("Siciliano", "Livraria Siciliano (Brasil)")
49
49
  # no preferences for the moment
50
50
  prefs.read
51
51
  end
@@ -57,7 +57,7 @@ module Alexandria
57
57
  end
58
58
 
59
59
  def search(criterion, type)
60
- criterion = criterion.encode('ISO-8859-1') # still needed??
60
+ criterion = criterion.encode("ISO-8859-1") # still needed??
61
61
  trying_again = false
62
62
  begin
63
63
  req = create_search_uri(type, criterion, trying_again)
@@ -71,13 +71,13 @@ module Alexandria
71
71
  else
72
72
  results.map { |result| get_book_from_search_result(result) }
73
73
  end
74
- rescue NoResultsError => err
74
+ rescue NoResultsError => ex
75
75
  if (type == SEARCH_BY_ISBN) && (trying_again == false)
76
76
  trying_again = true
77
77
  retry
78
- else
79
- raise err
80
78
  end
79
+
80
+ raise ex
81
81
  end
82
82
  end
83
83
 
@@ -90,10 +90,10 @@ module Alexandria
90
90
  private
91
91
 
92
92
  def create_search_uri(search_type, search_term, trying_again = false)
93
- (search_type_code = { SEARCH_BY_ISBN => 'G',
94
- SEARCH_BY_TITLE => 'A',
95
- SEARCH_BY_AUTHORS => 'B',
96
- SEARCH_BY_KEYWORD => 'X' }[search_type]) || 'X'
93
+ (search_type_code = { SEARCH_BY_ISBN => "G",
94
+ SEARCH_BY_TITLE => "A",
95
+ SEARCH_BY_AUTHORS => "B",
96
+ SEARCH_BY_KEYWORD => "X" }[search_type]) || "X"
97
97
  search_term_encoded = if search_type == SEARCH_BY_ISBN
98
98
  if trying_again
99
99
  # on second attempt, try ISBN-10...
@@ -123,35 +123,33 @@ module Alexandria
123
123
  book_search_results = []
124
124
  # each result will be a dict with keys :title, :author, :publisher, :url
125
125
 
126
- list_items = doc.search('div.pesquisa-item-lista-conteudo')
126
+ list_items = doc.search("div.pesquisa-item-lista-conteudo")
127
127
  list_items.each do |item|
128
- begin
129
- result = {}
128
+ result = {}
130
129
 
131
- # author & publisher
132
- author_publisher = ''
133
- item.children.each do |node|
134
- author_publisher += node.to_s if node.text?
135
- author_publisher.strip!
136
- break unless author_publisher.empty?
137
- end
138
- author, publisher = author_publisher.split('/')
139
- result[:author] = author.strip if author
140
- result[:publisher] = publisher.strip if publisher
130
+ # author & publisher
131
+ author_publisher = ""
132
+ item.children.each do |node|
133
+ author_publisher += node.to_s if node.text?
134
+ author_publisher.strip!
135
+ break unless author_publisher.empty?
136
+ end
137
+ author, publisher = author_publisher.split("/")
138
+ result[:author] = author.strip if author
139
+ result[:publisher] = publisher.strip if publisher
141
140
 
142
- # title & url
143
- link = item % 'a'
144
- result[:title] = link.inner_text.strip
145
- link_to_description = link['href']
146
- slash = ''
147
- slash = '/' unless link_to_description =~ /^\//
148
- result[:url] = "#{SITE}#{slash}#{link_to_description}"
141
+ # title & url
142
+ link = item % "a"
143
+ result[:title] = link.inner_text.strip
144
+ link_to_description = link["href"]
145
+ slash = ""
146
+ slash = "/" unless link_to_description.start_with?("/")
147
+ result[:url] = "#{SITE}#{slash}#{link_to_description}"
149
148
 
150
- book_search_results << result
151
- rescue StandardError => ex
152
- trace = ex.backtrace.join("\n> ")
153
- log.error { "Failed parsing Siciliano search page #{ex.message}\n#{trace}" }
154
- end
149
+ book_search_results << result
150
+ rescue StandardError => ex
151
+ trace = ex.backtrace.join("\n> ")
152
+ log.error { "Failed parsing Siciliano search page #{ex.message}\n#{trace}" }
155
153
  end
156
154
 
157
155
  book_search_results
@@ -161,51 +159,49 @@ module Alexandria
161
159
  # checked against Siciliano website 21 Feb 2009
162
160
  doc = html_to_doc(html)
163
161
  # title
164
- title_div = doc % 'div#conteudo//div.titulo'
162
+ title_div = doc % "div#conteudo//div.titulo"
165
163
  raise NoResultsError unless title_div
166
164
 
167
- title_h = title_div % 'h2'
165
+ title_h = title_div % "h2"
168
166
  title = title_h.inner_text if title_h
169
167
  # title = first_non_empty_text_node(title_div)
170
168
  # author_spans = doc/'span.rotulo'
171
- author_hs = title_div / 'h3.autor'
169
+ author_hs = title_div / "h3.autor"
172
170
  authors = []
173
171
  author_hs.each do |h|
174
172
  authors << h.inner_text.strip
175
173
  end
176
174
  ## synopsis_div = doc % 'div#sinopse'
177
- details_div = doc % 'div#tab-caracteristica'
175
+ details_div = doc % "div#tab-caracteristica"
178
176
  details = string_array_to_map(lines_of_text_as_array(details_div))
179
177
  # ISBN
180
- isbn = details['ISBN']
178
+ isbn = details["ISBN"]
181
179
  ## ean = details["CdBarras"]
182
- translator = details['Tradutor']
180
+ translator = details["Tradutor"]
183
181
  authors << translator if translator
184
- binding = details['Acabamento']
182
+ binding = details["Acabamento"]
185
183
  publisher = search_result[:publisher]
186
184
  # publish year
187
185
  publish_year = nil
188
- edition = details['Edio']
189
- if edition
190
- publish_year = Regexp.last_match[1].to_i if edition =~ /([12][0-9]{3})/ # publication date
191
- end
186
+ edition = details["Edio"]
187
+ # publication date
188
+ publish_year = Regexp.last_match[1].to_i if edition && edition =~ /([12][0-9]{3})/
192
189
  # cover
193
190
  # ImgSrc[1]="/imagem/imagem.dll?pro_id=1386929&PIM_Id=658849";
194
191
  image_urls = []
195
- (doc / 'script').each do |script|
192
+ (doc / "script").each do |script|
196
193
  next if script.children.nil?
197
194
 
198
195
  script.children.each do |ch|
199
196
  ch_text = ch.to_s
200
- if ch_text =~ /ImgSrc\[[\d]\]="(.+)";/
197
+ if ch_text =~ /ImgSrc\[\d\]="(.+)";/
201
198
  img_link = Regexp.last_match[1]
202
199
  image_urls << img_link
203
200
  end
204
201
  end
205
202
  end
206
203
  book = Book.new(title, authors, isbn, publisher, publish_year, binding)
207
- result = [book, image_urls.first]
208
- result
204
+ [book, image_urls.first]
209
205
  rescue StandardError => ex
210
206
  trace = ex.backtrace.join("\n> ")
211
207
  log.error { "Failed parsing Siciliano product page #{ex.message}\n#{trace}" }
@@ -213,7 +209,7 @@ module Alexandria
213
209
  end
214
210
 
215
211
  def first_non_empty_text_node(elem)
216
- text = ''
212
+ text = ""
217
213
  elem.children.each do |node|
218
214
  next unless node.text?
219
215
 
@@ -225,28 +221,28 @@ module Alexandria
225
221
 
226
222
  def lines_of_text_as_array(elem)
227
223
  lines = []
228
- current_text = ''
224
+ current_text = ""
229
225
  elem.children.each do |e|
230
226
  if e.text?
231
227
  current_text += e.to_s
232
- elsif e.name == 'br'
228
+ elsif e.name == "br"
233
229
  lines << current_text.strip
234
- current_text = ''
230
+ current_text = ""
235
231
  else
236
232
  current_text += e.inner_text
237
233
  end
238
234
  end
239
235
  lines << current_text.strip
240
- lines.delete('')
236
+ lines.delete("")
241
237
  lines
242
238
  end
243
239
 
244
240
  def string_array_to_map(arr)
245
241
  map = {}
246
242
  arr.each do |str|
247
- key, val = str.split(':')
243
+ key, val = str.split(":")
248
244
  # a real hack for not handling encoding properly :^)
249
- map[key.gsub(/[^a-zA-Z]/, '')] = val.strip if val
245
+ map[key.gsub(/[^a-zA-Z]/, "")] = val.strip if val
250
246
  end
251
247
  map
252
248
  end
@@ -0,0 +1,161 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is part of Alexandria.
4
+ #
5
+ # See the file README.md for authorship and licensing information.
6
+
7
+ # http://de.wikipedia.org/wiki/Thalia_%28Buchhandel%29
8
+ # Thalia.de bought the Austrian book trade chain Amadeus
9
+
10
+ # New Thalia provider, taken from Palatina MetaDataSource and modified
11
+ # for Alexandria. (21 Dec 2009)
12
+
13
+ require "net/http"
14
+ require "cgi"
15
+ require "alexandria/book_providers/web"
16
+
17
+ module Alexandria
18
+ class BookProviders
19
+ class ThaliaProvider < WebsiteBasedProvider
20
+ include Logging
21
+
22
+ SITE = "https://www.thalia.de"
23
+ BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s" # type,term
24
+
25
+ def initialize
26
+ super("Thalia", "Thalia (Germany)")
27
+ # no preferences for the moment
28
+ prefs.read
29
+ end
30
+
31
+ def url(book)
32
+ create_search_uri(SEARCH_BY_ISBN, book.isbn)
33
+ end
34
+
35
+ def search(criterion, type)
36
+ req = create_search_uri(type, criterion)
37
+ log.debug { req }
38
+ html_data = transport.get_response(URI.parse(req))
39
+ if type == SEARCH_BY_ISBN
40
+ parse_result_data(html_data.body, criterion)
41
+ else
42
+ results = parse_search_result_data(html_data.body)
43
+ raise NoResultsError if results.empty?
44
+
45
+ results.map { |result| get_book_from_search_result(result) }
46
+ end
47
+ end
48
+
49
+ def create_search_uri(search_type, search_term)
50
+ (search_type_code = {
51
+ SEARCH_BY_ISBN => "sq",
52
+ SEARCH_BY_AUTHORS => "sa", # Autor
53
+ SEARCH_BY_TITLE => "st", # Titel
54
+ SEARCH_BY_KEYWORD => "ssw" # Schlagwort
55
+ }[search_type]) || ""
56
+ search_type_code = CGI.escape(search_type_code)
57
+ search_term_encoded = if search_type == SEARCH_BY_ISBN
58
+ # search_term_encoded = search_term.as_isbn_13
59
+ Library.canonicalise_isbn(search_term) # check this!
60
+ else
61
+ CGI.escape(search_term)
62
+ end
63
+ format(BASE_SEARCH_URL, search_type_code, search_term_encoded)
64
+ end
65
+
66
+ def parse_search_result_data(html)
67
+ doc = html_to_doc(html)
68
+ book_search_results = []
69
+
70
+ results_items = doc / "ul.weitere-formate li.format"
71
+
72
+ results_items.each do |item|
73
+ result = {}
74
+ item_link = item % "a"
75
+ result[:lookup_url] = "#{SITE}#{item_link['href']}"
76
+ book_search_results << result
77
+ end
78
+ book_search_results
79
+ end
80
+
81
+ def data_from_label(node, label_text)
82
+ label_node = node % "th[text()*='#{label_text}']"
83
+ return "" unless label_node
84
+
85
+ item_node = label_node.parent % "td"
86
+ item_node.inner_text.strip
87
+ end
88
+
89
+ def get_book_from_search_result(result)
90
+ log.debug { "Fetching book from #{result[:lookup_url]}" }
91
+ html_data = transport.get_response(URI.parse(result[:lookup_url]))
92
+ parse_result_data(html_data.body, "noisbn", recursing: true)
93
+ end
94
+
95
+ def parse_result_data(html, isbn, recursing: false)
96
+ doc = html_to_doc(html)
97
+
98
+ results_divs = doc / "ul.weitere-formate"
99
+ unless results_divs.empty?
100
+ if recursing
101
+ # already recursing, avoid doing so endlessly second time
102
+ # around *should* lead to a book description, not a result
103
+ # list
104
+ return
105
+ end
106
+
107
+ # ISBN-lookup results in multiple results
108
+ results = parse_search_result_data(html)
109
+ chosen = results.first # fallback!
110
+ html_data = transport.get_response(URI.parse(chosen[:lookup_url]))
111
+ return parse_result_data(html_data.body, isbn, recursing: true)
112
+ end
113
+
114
+ begin
115
+ if (div = doc % "section#sbe-product-details")
116
+ title = div["data-titel"]
117
+
118
+ if (author_p = doc % "p.aim-author")
119
+ authors = []
120
+ author_links = author_p / :a
121
+ author_links.each do |a|
122
+ authors << a.inner_text.strip
123
+ end
124
+ end
125
+
126
+ item_details = doc % "section.artikeldetails"
127
+ isbns = []
128
+ isbns << data_from_label(item_details, "EAN")
129
+ isbns << data_from_label(item_details, "ISBN")
130
+ isbns.reject!(&:empty?)
131
+
132
+ year = nil
133
+ date = data_from_label(item_details, "Erscheinungsdatum")
134
+ year = Regexp.last_match[1].to_i if date =~ /(\d{4})/
135
+
136
+ book_binding = data_from_label(item_details, "Einband")
137
+
138
+ publisher = data_from_label(item_details, "Verlag")
139
+
140
+ book = Book.new(title, authors, isbns.first,
141
+ publisher, year, book_binding)
142
+
143
+ image_url = nil
144
+ if (image = doc % "section.imagesPreview img")
145
+ image_url = image["src"]
146
+ end
147
+
148
+ [book, image_url]
149
+ end
150
+ rescue StandardError => ex
151
+ trace = ex.backtrace.join("\n> ")
152
+ log.warn do
153
+ "Failed parsing search results for Thalia " \
154
+ "#{ex.message} #{trace}"
155
+ end
156
+ raise NoResultsError
157
+ end
158
+ end
159
+ end
160
+ end
161
+ end
@@ -4,8 +4,8 @@
4
4
  #
5
5
  # See the file README.md for authorship and licensing information.
6
6
 
7
- require 'hpricot'
8
- require 'htmlentities'
7
+ require "hpricot"
8
+ require "htmlentities"
9
9
 
10
10
  module Alexandria
11
11
  class BookProviders
@@ -15,9 +15,9 @@ module Alexandria
15
15
  @htmlentities = HTMLEntities.new
16
16
  end
17
17
 
18
- def html_to_doc(html, source_data_charset = 'ISO-8859-1')
18
+ def html_to_doc(html, source_data_charset = "ISO-8859-1")
19
19
  html.force_encoding source_data_charset
20
- utf8_html = html.encode('utf-8')
20
+ utf8_html = html.encode("utf-8")
21
21
  normalized_html = @htmlentities.decode(utf8_html)
22
22
  Hpricot(normalized_html)
23
23
  end
@@ -33,7 +33,7 @@ module Alexandria
33
33
  nil
34
34
  else
35
35
  node_text = node.children.map { |n| text_of(n) }.join
36
- node_text.strip.squeeze(' ')
36
+ node_text.strip.squeeze(" ")
37
37
  end
38
38
  end
39
39
  # node.inner_html.strip
@@ -1,24 +1,9 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # -*- ruby -*-
3
+ # This file is part of Alexandria.
4
4
  #
5
- # Copyright (C) 2009 Cathal Mc Ginley
6
- # Copyright (C) 2011, 2014 Matijs van Zuijlen
7
- #
8
- # Alexandria is free software; you can redistribute it and/or
9
- # modify it under the terms of the GNU General Public License as
10
- # published by the Free Software Foundation; either version 2 of the
11
- # License, or (at your option) any later version.
12
- #
13
- # Alexandria is distributed in the hope that it will be useful,
14
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
- # General Public License for more details.
17
- #
18
- # You should have received a copy of the GNU General Public
19
- # License along with Alexandria; see the file COPYING. If not,
20
- # write to the Free Software Foundation, Inc., 51 Franklin Street,
21
- # Fifth Floor, Boston, MA 02110-1301 USA.
5
+ # See the file README.md for authorship and licensing information.
6
+ # frozen_string_literal: true
22
7
 
23
8
  # http://en.wikipedia.org/wiki/WorldCat
24
9
  # See http://www.oclc.org/worldcat/policies/terms/
@@ -30,34 +15,27 @@
30
15
  # Updated from Palatina, to reflect changes in the worldcat website.
31
16
  # (1 Sep 2009)
32
17
 
33
- require 'cgi'
34
- require 'alexandria/net'
35
- require 'alexandria/book_providers/web'
18
+ require "cgi"
19
+ require "alexandria/net"
20
+ require "alexandria/book_providers/web"
36
21
 
37
22
  module Alexandria
38
23
  class BookProviders
39
24
  class WorldCatProvider < WebsiteBasedProvider
40
- include Alexandria::Logging
25
+ include Logging
41
26
 
42
- SITE = 'http://www.worldcat.org'
27
+ SITE = "https://www.worldcat.org"
43
28
  BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced" # type, term
44
29
 
45
30
  def initialize
46
- super('WorldCat', 'WorldCat')
47
- # prefs.add("enabled", _("Enabled"), true, [true,false])
31
+ super("WorldCat", "WorldCat")
48
32
  prefs.read
49
33
  end
50
34
 
51
35
  def search(criterion, type)
52
- # puts create_search_uri(type, criterion)
53
36
  req = create_search_uri(type, criterion)
54
- puts req if $DEBUG
55
37
  html_data = transport.get_response(URI.parse(req))
56
- # Note: I tried to use Alexandria::WWWAgent,
57
- # but this caused failures here (empty pages...)
58
- # find out how the requests differ
59
38
 
60
- # puts html_data.class
61
39
  if type == SEARCH_BY_ISBN
62
40
  parse_result_data(html_data.body, criterion)
63
41
  else
@@ -70,18 +48,15 @@ module Alexandria
70
48
 
71
49
  def url(book)
72
50
  create_search_uri(SEARCH_BY_ISBN, book.isbn)
73
- rescue StandardError => ex
74
- log.warn { "Cannot create url for book #{book}; #{ex.message}" }
75
- nil
76
51
  end
77
52
 
78
53
  private
79
54
 
80
55
  def create_search_uri(search_type, search_term)
81
- (search_type_code = { SEARCH_BY_ISBN => 'isbn:',
82
- SEARCH_BY_AUTHORS => 'au:',
83
- SEARCH_BY_TITLE => 'ti:',
84
- SEARCH_BY_KEYWORD => '' }[search_type]) || ''
56
+ (search_type_code = { SEARCH_BY_ISBN => "isbn:",
57
+ SEARCH_BY_AUTHORS => "au:",
58
+ SEARCH_BY_TITLE => "ti:",
59
+ SEARCH_BY_KEYWORD => "" }[search_type]) || ""
85
60
  search_type_code = CGI.escape(search_type_code)
86
61
  search_term_encoded = if search_type == SEARCH_BY_ISBN
87
62
  Library.canonicalise_ean(search_term) # isbn-13
@@ -98,19 +73,18 @@ module Alexandria
98
73
  end
99
74
 
100
75
  def parse_search_result_data(html)
101
- doc = html_to_doc(html, 'UTF-8')
76
+ doc = html_to_doc(html, "UTF-8")
102
77
  book_search_results = []
103
78
  begin
104
- result_cells = doc / 'td.result/div.name/..'
105
- # puts result_cells.length
79
+ result_cells = doc / "td.result/div.name/.."
106
80
  result_cells.each do |td|
107
- type_icon = (td % 'div.type/img.icn')
108
- next unless type_icon && type_icon['src'] =~ /icon-bks/
81
+ type_icon = (td % "div.type/img.icn")
82
+ next unless type_icon && type_icon["src"].include?("icon-bks")
109
83
 
110
- name_div = td % 'div.name'
84
+ name_div = td % "div.name"
111
85
  title = name_div.inner_text
112
86
  anchor = name_div % :a
113
- url = anchor['href'] if anchor
87
+ url = anchor["href"] if anchor
114
88
  lookup_url = "#{SITE}#{url}"
115
89
  result = {}
116
90
  result[:title] = title
@@ -120,29 +94,29 @@ module Alexandria
120
94
  end
121
95
  rescue StandardError => ex
122
96
  trace = ex.backtrace.join("\n> ")
123
- log.warn {
124
- 'Failed parsing search results for WorldCat ' \
97
+ log.warn do
98
+ "Failed parsing search results for WorldCat " \
125
99
  "#{ex.message} #{trace}"
126
- }
100
+ end
127
101
  end
128
102
  book_search_results
129
103
  end
130
104
 
131
105
  def parse_result_data(html, search_isbn = nil, recursing = false)
132
- doc = html_to_doc(html, 'UTF-8')
106
+ doc = html_to_doc(html, "UTF-8")
133
107
 
134
108
  begin
135
- if doc % 'div#div-results-none'
136
- log.debug { 'WorldCat reports no results' }
109
+ if doc % "div#div-results-none"
110
+ log.debug { "WorldCat reports no results" }
137
111
  raise NoResultsError
138
112
  end
139
113
 
140
- if doc % 'table.table-results'
114
+ if doc % "table.table-results"
141
115
  if recursing
142
- log.warn { 'Infinite loop prevented redirecting through WorldCat' }
116
+ log.warn { "Infinite loop prevented redirecting through WorldCat" }
143
117
  raise NoResultsError
144
118
  end
145
- log.info { 'Found multiple results for lookup: checking each' }
119
+ log.info { "Found multiple results for lookup: checking each" }
146
120
  search_results = parse_search_result_data(html)
147
121
  book = nil
148
122
  cover_url = nil
@@ -154,40 +128,39 @@ module Alexandria
154
128
  html2 = rslt2.body
155
129
 
156
130
  book, cover_url = parse_result_data(html2, search_isbn, true)
157
- first_result = [book, cover_url] if first_result.nil?
158
131
 
159
132
  log.debug { "got book #{book}" }
160
133
 
161
- if search_isbn
162
- search_isbn_canon = Library.canonicalise_ean(search_isbn)
163
- rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
164
- if search_isbn_canon == rslt_isbn_canon
165
- log.info { "book #{book} is a match" }
166
- return [book, cover_url]
167
- end
168
- log.debug { 'not a match, checking next' }
169
- else
170
- # no constraint to match isbn, just return first result
134
+ return [book, cover_url] unless search_isbn
135
+
136
+ first_result = [book, cover_url] if first_result.nil?
137
+
138
+ search_isbn_canon = Library.canonicalise_ean(search_isbn)
139
+ rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
140
+ if search_isbn_canon == rslt_isbn_canon
141
+ log.info { "book #{book} is a match" }
171
142
  return [book, cover_url]
172
143
  end
144
+ log.debug { "not a match, checking next" }
173
145
  end
174
146
 
175
147
  # gone through all and no ISBN match, so just return first result
176
- log.info { 'no more results to check. Returning first result, just an approximation' }
148
+ log.info do
149
+ "no more results to check. Returning first result, just an approximation"
150
+ end
177
151
  return first_result
178
-
179
152
  end
180
153
 
181
- title_header = doc % 'h1.title'
154
+ title_header = doc % "h1.title"
182
155
  title = title_header.inner_text if title_header
183
156
  unless title
184
- log.warn { 'Unexpected lack of title from WorldCat lookup' }
157
+ log.warn { "Unexpected lack of title from WorldCat lookup" }
185
158
  raise NoResultsError
186
159
  end
187
160
  log.info { "Found book #{title} at WorldCat" }
188
161
 
189
162
  authors = []
190
- authors_tr = doc % 'tr#details-allauthors'
163
+ authors_tr = doc % "tr#details-allauthors"
191
164
  if authors_tr
192
165
  (authors_tr / :a).each do |a|
193
166
  authors << a.inner_text
@@ -195,17 +168,17 @@ module Alexandria
195
168
  end
196
169
 
197
170
  # can we do better? get the City name?? or multiple publishers?
198
- bibdata = doc % 'div#bibdata'
171
+ bibdata = doc % "div#bibdata"
199
172
  bibdata_table = bibdata % :table
200
- publisher_row = bibdata_table % 'th[text()*=Publisher]/..'
173
+ publisher_row = bibdata_table % "th[text()*=Publisher]/.."
201
174
 
202
175
  if publisher_row
203
- publication_info = (publisher_row / 'td').last.inner_text
176
+ publication_info = (publisher_row / "td").last.inner_text
204
177
 
205
- publication_info =~ if publication_info.index(';')
206
- /;[\s]*([^\d]+)[\s]*[\d]*/
207
- elsif publication_info.index(':')
208
- /:[\s]*([^;:,]+)/
178
+ publication_info =~ if publication_info.index(";")
179
+ /;\s*([^\d]+)\s*\d*/
180
+ elsif publication_info.index(":")
181
+ /:\s*([^;:,]+)/
209
182
  else
210
183
  /([^;,]+)/
211
184
  end
@@ -218,35 +191,33 @@ module Alexandria
218
191
  year = nil
219
192
  end
220
193
 
221
- isbn = search_isbn
222
- unless isbn
223
- isbn_row = doc % 'tr#details-standardno' # #bibdata_table % 'th[text()*=ISBN]/..'
224
- if isbn_row
225
- isbns = (isbn_row / 'td').last.inner_text.split
226
- isbn = Library.canonicalise_isbn(isbns.first)
227
- else
228
- log.warn { 'No ISBN found on page' }
229
- end
194
+ isbn_row = doc % "tr#details-standardno"
195
+ if isbn_row
196
+ isbns = (isbn_row / "td").last.inner_text.split
197
+ isbn = Library.canonicalise_isbn(isbns.first)
198
+ else
199
+ log.warn { "No ISBN found on page" }
200
+ isbn = search_isbn
230
201
  end
231
202
 
232
- binding = '' # not given on WorldCat website (as far as I can tell)
203
+ book_binding = "" # not given on WorldCat website (as far as I can tell)
233
204
 
234
- book = Book.new(title, authors, isbn, publisher, year, binding)
205
+ book = Book.new(title, authors, isbn, publisher, year, book_binding)
235
206
 
236
207
  image_url = nil # hm, it's on the website, but uses JavaScript...
237
208
 
238
- return [book, image_url]
209
+ [book, image_url]
239
210
  rescue StandardError => ex
240
211
  raise ex if ex.instance_of? NoResultsError
241
212
 
242
213
  trace = ex.backtrace.join("\n> ")
243
- log.warn {
244
- 'Failed parsing search results for WorldCat ' \
214
+ log.warn do
215
+ "Failed parsing search results for WorldCat " \
245
216
  "#{ex.message} #{trace}"
246
- }
217
+ end
247
218
  raise NoResultsError
248
219
  end
249
220
  end
250
- end # class WorldCatProvider
251
- end # class BookProviders
252
- end # module Alexandria
221
+ end
222
+ end
223
+ end