alexandria-book-collection-manager 0.7.5 → 0.7.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (173) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +9 -0
  3. data/.github/workflows/ruby.yml +72 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +65 -30
  6. data/.rubocop_todo.yml +49 -165
  7. data/.simplecov +5 -2
  8. data/CHANGELOG.md +64 -0
  9. data/ChangeLog.0 +19 -19
  10. data/INSTALL.md +26 -16
  11. data/README.md +31 -35
  12. data/Rakefile +18 -16
  13. data/alexandria-book-collection-manager.gemspec +35 -29
  14. data/doc/FAQ +2 -2
  15. data/doc/dependency_decisions.yml +22 -3
  16. data/lib/alexandria/about.rb +1 -1
  17. data/lib/alexandria/book_providers/bl_provider.rb +88 -0
  18. data/lib/alexandria/book_providers/douban.rb +2 -2
  19. data/lib/alexandria/book_providers/loc_provider.rb +38 -0
  20. data/lib/alexandria/book_providers/pseudomarc.rb +61 -71
  21. data/lib/alexandria/book_providers/sbn_provider.rb +108 -0
  22. data/lib/alexandria/book_providers/{thalia.rb → thalia_provider.rb} +37 -74
  23. data/lib/alexandria/book_providers/web.rb +2 -2
  24. data/lib/alexandria/book_providers/worldcat.rb +34 -38
  25. data/lib/alexandria/book_providers/z3950_provider.rb +199 -0
  26. data/lib/alexandria/book_providers.rb +48 -65
  27. data/lib/alexandria/default_preferences.rb +2 -1
  28. data/lib/alexandria/execution_queue.rb +13 -12
  29. data/lib/alexandria/export_library.rb +21 -22
  30. data/lib/alexandria/image_fetcher.rb +25 -0
  31. data/lib/alexandria/import_library.rb +46 -70
  32. data/lib/alexandria/import_library_csv.rb +16 -16
  33. data/lib/alexandria/library_sort_order.rb +3 -1
  34. data/lib/alexandria/library_store.rb +19 -20
  35. data/lib/alexandria/logging.rb +5 -9
  36. data/lib/alexandria/models/book.rb +15 -2
  37. data/lib/alexandria/models/library.rb +31 -35
  38. data/lib/alexandria/net.rb +1 -2
  39. data/lib/alexandria/preferences.rb +27 -33
  40. data/lib/alexandria/scanners/cue_cat.rb +6 -6
  41. data/lib/alexandria/scanners/keyboard.rb +1 -1
  42. data/lib/alexandria/scanners.rb +2 -2
  43. data/lib/alexandria/smart_library.rb +22 -26
  44. data/lib/alexandria/ui/about_dialog.rb +1 -1
  45. data/lib/alexandria/ui/acquire_dialog.rb +15 -19
  46. data/lib/alexandria/ui/alert_dialog.rb +36 -19
  47. data/lib/alexandria/ui/bad_isbns_dialog.rb +13 -9
  48. data/lib/alexandria/ui/barcode_animation.rb +6 -6
  49. data/lib/alexandria/ui/book_properties_dialog.rb +2 -3
  50. data/lib/alexandria/ui/book_properties_dialog_base.rb +35 -137
  51. data/lib/alexandria/ui/calendar_popup.rb +58 -0
  52. data/lib/alexandria/ui/callbacks.rb +144 -123
  53. data/lib/alexandria/ui/completion_models.rb +2 -6
  54. data/lib/alexandria/ui/confirm_erase_dialog.rb +1 -1
  55. data/lib/alexandria/ui/conflict_while_copying_dialog.rb +2 -2
  56. data/lib/alexandria/ui/error_dialog.rb +1 -1
  57. data/lib/alexandria/ui/export_dialog.rb +19 -18
  58. data/lib/alexandria/ui/icons.rb +34 -40
  59. data/lib/alexandria/ui/iconview_tooltips.rb +40 -53
  60. data/lib/alexandria/ui/import_dialog.rb +49 -48
  61. data/lib/alexandria/ui/init.rb +14 -12
  62. data/lib/alexandria/ui/keep_bad_isbn_dialog.rb +2 -2
  63. data/lib/alexandria/ui/libraries_combo.rb +10 -9
  64. data/lib/alexandria/ui/listview.rb +6 -7
  65. data/lib/alexandria/ui/main_app.rb +2 -2
  66. data/lib/alexandria/ui/multi_drag_treeview.rb +5 -7
  67. data/lib/alexandria/ui/new_book_dialog.rb +63 -65
  68. data/lib/alexandria/ui/new_book_dialog_manual.rb +1 -1
  69. data/lib/alexandria/ui/new_provider_dialog.rb +12 -11
  70. data/lib/alexandria/ui/new_smart_library_dialog.rb +39 -27
  71. data/lib/alexandria/ui/preferences_dialog.rb +25 -84
  72. data/lib/alexandria/ui/provider_preferences_base_dialog.rb +10 -6
  73. data/lib/alexandria/ui/provider_preferences_dialog.rb +5 -5
  74. data/lib/alexandria/ui/really_delete_dialog.rb +2 -2
  75. data/lib/alexandria/ui/sidepane_manager.rb +38 -38
  76. data/lib/alexandria/ui/skip_entry_dialog.rb +3 -2
  77. data/lib/alexandria/ui/smart_library_properties_dialog.rb +35 -36
  78. data/lib/alexandria/ui/smart_library_properties_dialog_base.rb +61 -244
  79. data/lib/alexandria/ui/smart_library_rule_box.rb +119 -0
  80. data/lib/alexandria/ui/sound.rb +4 -6
  81. data/lib/alexandria/ui/ui_manager.rb +80 -83
  82. data/lib/alexandria/ui.rb +7 -7
  83. data/lib/alexandria/version.rb +2 -2
  84. data/lib/alexandria/web_themes.rb +15 -15
  85. data/lib/alexandria.rb +2 -2
  86. data/po/cs.po +947 -865
  87. data/po/cy.po +913 -864
  88. data/po/de.po +961 -865
  89. data/po/el.po +956 -861
  90. data/po/es.po +952 -857
  91. data/po/fr.po +950 -865
  92. data/po/ga.po +866 -819
  93. data/po/gl.po +946 -861
  94. data/po/it.po +945 -858
  95. data/po/ja.po +921 -836
  96. data/po/mk.po +953 -858
  97. data/po/nb.po +932 -847
  98. data/po/nl.po +955 -849
  99. data/po/pl.po +999 -963
  100. data/po/pt.po +946 -850
  101. data/po/pt_BR.po +944 -859
  102. data/po/ru.po +959 -868
  103. data/po/sk.po +950 -863
  104. data/po/sv.po +944 -859
  105. data/po/uk.po +925 -846
  106. data/po/zh_TW.po +926 -841
  107. data/schemas/alexandria.schemas +1 -1
  108. data/share/alexandria/glade/main_app__builder.glade +6 -21
  109. data/share/gnome/help/alexandria/C/adding-books.xml +3 -4
  110. data/share/gnome/help/alexandria/C/introduction.xml +0 -16
  111. data/share/gnome/help/alexandria/C/searching.xml +1 -4
  112. data/share/gnome/help/alexandria/C/settings.xml +0 -30
  113. data/share/gnome/help/alexandria/C/smart-libraries.xml +2 -2
  114. data/share/gnome/help/alexandria/C/working-with-libraries.xml +1 -1
  115. data/share/gnome/help/alexandria/fr/alexandria.xml +5 -160
  116. data/share/gnome/help/alexandria/ja/adding-books.xml +1 -1
  117. data/share/gnome/help/alexandria/ja/introduction.xml +0 -15
  118. data/share/gnome/help/alexandria/ja/searching.xml +3 -7
  119. data/share/gnome/help/alexandria/ja/settings.xml +0 -27
  120. data/share/gnome/help/alexandria/ja/smart-libraries.xml +1 -1
  121. data/spec/alexandria/book_providers/bl_provider_spec.rb +13 -0
  122. data/spec/alexandria/book_providers/loc_provider_spec.rb +17 -0
  123. data/spec/alexandria/book_providers/sbn_provider_spec.rb +13 -0
  124. data/spec/alexandria/book_providers/thalia_provider_spec.rb +119 -0
  125. data/spec/alexandria/book_providers/world_cat_provider_spec.rb +160 -0
  126. data/spec/alexandria/book_providers_spec.rb +0 -154
  127. data/spec/alexandria/console_spec.rb +0 -5
  128. data/spec/alexandria/export_library_spec.rb +27 -38
  129. data/spec/alexandria/library_spec.rb +76 -46
  130. data/spec/alexandria/preferences_spec.rb +29 -3
  131. data/spec/alexandria/scanners/cue_cat_spec.rb +1 -1
  132. data/spec/alexandria/ui/about_dialog_spec.rb +1 -1
  133. data/spec/alexandria/ui/acquire_dialog_spec.rb +1 -1
  134. data/spec/alexandria/ui/alert_dialog_spec.rb +1 -1
  135. data/spec/alexandria/ui/bad_isbns_dialog_spec.rb +1 -1
  136. data/spec/alexandria/ui/book_properties_dialog_spec.rb +47 -5
  137. data/spec/alexandria/ui/confirm_erase_dialog_spec.rb +1 -1
  138. data/spec/alexandria/ui/conflict_while_copying_dialog_spec.rb +1 -1
  139. data/spec/alexandria/ui/error_dialog_spec.rb +1 -1
  140. data/spec/alexandria/ui/export_dialog_spec.rb +25 -4
  141. data/spec/alexandria/ui/icons_spec.rb +26 -0
  142. data/spec/alexandria/ui/iconview_spec.rb +1 -1
  143. data/spec/alexandria/ui/import_dialog_spec.rb +35 -3
  144. data/spec/alexandria/ui/keep_bad_isbn_dialog_spec.rb +1 -1
  145. data/spec/alexandria/ui/main_app_spec.rb +1 -1
  146. data/spec/alexandria/ui/new_book_dialog_manual_spec.rb +39 -3
  147. data/spec/alexandria/ui/new_provider_dialog_spec.rb +19 -3
  148. data/spec/alexandria/ui/new_smart_library_dialog_spec.rb +28 -3
  149. data/spec/alexandria/ui/preferences_dialog_spec.rb +2 -2
  150. data/spec/alexandria/ui/provider_preferences_dialog_spec.rb +23 -8
  151. data/spec/alexandria/ui/really_delete_dialog_spec.rb +1 -1
  152. data/spec/alexandria/ui/sidepane_manager_spec.rb +2 -2
  153. data/spec/alexandria/ui/skip_entry_dialog_spec.rb +1 -1
  154. data/spec/alexandria/ui/smart_library_properties_dialog_spec.rb +37 -6
  155. data/spec/alexandria/ui/ui_manager_spec.rb +116 -2
  156. data/spec/data/libraries/0.6.2/My Library/9780571147168.yaml +2 -0
  157. data/spec/end_to_end/basic_run_spec.rb +3 -8
  158. data/spec/fixtures/cover.jpg +0 -0
  159. data/spec/spec_helper.rb +47 -3
  160. data/tasks/spec.rake +3 -5
  161. data/util/rake/fileinstall.rb +16 -15
  162. data/util/rake/omfgenerate.rb +1 -1
  163. metadata +141 -52
  164. data/.travis.yml +0 -39
  165. data/lib/alexandria/book_providers/adlibris.rb +0 -196
  166. data/lib/alexandria/book_providers/amazon_aws.rb +0 -252
  167. data/lib/alexandria/book_providers/amazon_ecs_util.rb +0 -388
  168. data/lib/alexandria/book_providers/barnes_and_noble.rb +0 -209
  169. data/lib/alexandria/book_providers/proxis.rb +0 -175
  170. data/lib/alexandria/book_providers/siciliano.rb +0 -257
  171. data/lib/alexandria/book_providers/z3950.rb +0 -415
  172. data/spec/alexandria/ui/ui_utilities_spec.rb +0 -62
  173. data/spec/alexandria/utilities_spec.rb +0 -52
@@ -41,9 +41,9 @@ module Alexandria
41
41
  notes: ["520", "a"]
42
42
  }.freeze
43
43
 
44
- def self.get_fields(data, type, stripping, m = USMARC_MAPPINGS)
44
+ def self.get_fields(data, type, stripping, mappings = USMARC_MAPPINGS)
45
45
  field = ""
46
- m[type][1..m[type].length - 1].each do |part|
46
+ mappings[type][1..mappings[type].length - 1].each do |part|
47
47
  if data.first[part]
48
48
  part_data = data.first[part].strip
49
49
  if part_data =~ stripping
@@ -58,69 +58,68 @@ module Alexandria
58
58
  field
59
59
  end
60
60
 
61
- def self.marc_text_to_book(marc, m = USMARC_MAPPINGS)
61
+ def self.marc_text_to_book(marc, mappings = USMARC_MAPPINGS)
62
62
  details = marc_text_to_details(marc)
63
- unless details.empty?
64
- title = nil
65
- title_data = details[m[:title][0]]
66
- if title_data
67
- title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, m)
68
- title = title_data_all if title_data_all
69
- end
63
+ return if details.empty?
70
64
 
71
- authors = []
72
- author_data = details[m[:authors][0]]
73
- author_data&.each do |ad|
74
- author = ad[m[:authors][1]]
75
- if author
76
- author = author.strip
77
- author = Regexp.last_match[1] if author =~ /(.*),$/
78
- authors << author
79
- end
80
- end
65
+ title = nil
66
+ title_data = details[mappings[:title][0]]
67
+ if title_data
68
+ title_data_all = get_fields(title_data, :title, %r{(.*)[/:]$}, mappings)
69
+ title = title_data_all if title_data_all
70
+ end
81
71
 
82
- isbn = nil
83
- binding = nil
84
- isbn_data = details[m[:isbn][0]]
85
- if isbn_data
86
- isbn = Regexp.last_match[1] if isbn_data.first[m[:isbn][1]] =~ /([-0-9xX]+)/
72
+ authors = []
73
+ author_data = details[mappings[:authors][0]]
74
+ author_data&.each do |ad|
75
+ author = ad[mappings[:authors][1]]
76
+ if author
77
+ author = author.strip
78
+ author = Regexp.last_match[1] if author =~ /(.*),$/
79
+ authors << author
87
80
  end
81
+ end
88
82
 
89
- binding_data = details[m[:binding][0]]
90
- if binding_data
91
- if binding_data.first[m[:binding][1]] =~ /([a-zA-Z][a-z\s]+[a-z])/
92
- binding = Regexp.last_match[1]
93
- end
94
- end
83
+ isbn = nil
84
+ binding = nil
85
+ isbn_data = details[mappings[:isbn][0]]
86
+ if isbn_data && isbn_data.first[mappings[:isbn][1]] =~ /([-0-9xX]+)/
87
+ isbn = Regexp.last_match[1]
88
+ end
95
89
 
96
- publisher = nil
97
- publisher_data = details[m[:publisher][0]]
98
- publisher = publisher_data.first[m[:publisher][1]] if publisher_data
90
+ binding_data = details[mappings[:binding][0]]
91
+ if binding_data &&
92
+ binding_data.first[mappings[:binding][1]] =~ /([a-zA-Z][a-z\s]+[a-z])/
93
+ binding = Regexp.last_match[1]
94
+ end
99
95
 
100
- year = nil
101
- publication_data = details[m[:year][0]]
102
- if publication_data
103
- year = publication_data.first[m[:year][1]]
104
- year = Regexp.last_match[1].to_i if year =~ /(\d+)/
105
- end
96
+ publisher = nil
97
+ publisher_data = details[mappings[:publisher][0]]
98
+ publisher = publisher_data.first[mappings[:publisher][1]] if publisher_data
106
99
 
107
- notes = ""
108
- notes_data = details[m[:notes][0]]
109
- notes_data&.each do |note|
110
- txt = note[m[:notes][1]]
111
- notes += txt if txt
112
- end
100
+ year = nil
101
+ publication_data = details[mappings[:year][0]]
102
+ if publication_data
103
+ year = publication_data.first[mappings[:year][1]]
104
+ year = Regexp.last_match[1].to_i if year =~ /(\d+)/
105
+ end
113
106
 
114
- if title.nil? && isbn.nil?
115
- # probably didn't undertand the MARC dialect
116
- return nil
117
- end
107
+ notes = ""
108
+ notes_data = details[mappings[:notes][0]]
109
+ notes_data&.each do |note|
110
+ txt = note[mappings[:notes][1]]
111
+ notes += txt if txt
112
+ end
118
113
 
119
- book = Alexandria::Book.new(title, authors, isbn,
120
- publisher, year, binding)
121
- book.notes = notes unless notes.empty?
122
- book
114
+ if title.nil? && isbn.nil?
115
+ # probably didn't undertand the MARC dialect
116
+ return nil
123
117
  end
118
+
119
+ book = Alexandria::Book.new(title, authors, isbn,
120
+ publisher, year, binding)
121
+ book.notes = notes unless notes.empty?
122
+ book
124
123
  end
125
124
 
126
125
  def self.marc_text_to_details(marc)
@@ -132,31 +131,22 @@ module Alexandria
132
131
 
133
132
  this_line_data = {}
134
133
 
135
- # puts code
136
- # puts data
137
134
  d_idx = 0
138
135
  while d_idx < data.size
139
- d_str = data[d_idx..-1]
140
- # puts d_str
141
- if (idx = d_str =~ /\$([a-z]) ([^\$]+)/)
142
- # puts idx
143
- sub_code = Regexp.last_match[1]
144
- sub_data = Regexp.last_match[2]
145
- this_line_data[sub_code] = sub_data
146
- # puts " " + $1
147
- # puts " " + $2
148
- # puts idx
149
- d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
150
- else
151
- break
152
- end
136
+ d_str = data[d_idx..]
137
+ idx = d_str =~ /\$([a-z]) ([^$]+)/
138
+ break unless idx
139
+
140
+ sub_code = Regexp.last_match[1]
141
+ sub_data = Regexp.last_match[2]
142
+ this_line_data[sub_code] = sub_data
143
+ d_idx += idx + 2 # (2 extra to push beyond this '$a' etc.)
153
144
  end
154
145
 
155
146
  unless this_line_data.empty?
156
147
  details[code] = [] unless details.key?(code)
157
148
  details[code] << this_line_data
158
149
  end
159
-
160
150
  end
161
151
  end
162
152
  details
@@ -0,0 +1,108 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This file is part of Alexandria.
4
+ #
5
+ # See the file README.md for authorship and licensing information.
6
+
7
+ require "alexandria/book_providers/z3950_provider"
8
+
9
+ module Alexandria
10
+ class BookProviders
11
+ class SBNProvider < Z3950Provider
12
+ # http://sbnonline.sbn.it/
13
+ # http://it.wikipedia.org/wiki/ICCU
14
+ unabstract
15
+
16
+ include GetText
17
+ GetText.bindtextdomain(Alexandria::TEXTDOMAIN, charset: "UTF-8")
18
+
19
+ def initialize
20
+ super("SBN", "Servizio Bibliotecario Nazionale (Italy)")
21
+ prefs.variable_named("hostname").default_value = "opac.sbn.it"
22
+ prefs.variable_named("port").default_value = 3950
23
+ prefs.variable_named("database").default_value = "nopac"
24
+ # supported 'USMARC', 'UNIMARC' , 'SUTRS'
25
+ prefs.variable_named("record_syntax").default_value = "USMARC"
26
+ prefs.variable_named("charset").default_value = "ISO-8859-1"
27
+ prefs.read
28
+ end
29
+
30
+ def url(book)
31
+ "http://sbnonline.sbn.it/cgi-bin/zgw/BRIEF.pl?displayquery=" \
32
+ "%253CB%253E%253Cfont%2520color%253D%2523000064%253E" \
33
+ "Codice%2520ISBN%253C%2FB%253E%253C%2Ffont%253E%2520" \
34
+ "contiene%2520%2522%2520%253CFONT%2520COLOR%253Dred%253E" +
35
+ canonicalise_isbn_with_dashes(book.isbn) +
36
+ "%253C%2FFONT%253E%2522&session=&zurl=opac" \
37
+ "&zquery=%281%3D7+4%3D2+2%3D3+5%3D100+6%3D1+3%3D3+%22" +
38
+ canonicalise_isbn_with_dashes(book.isbn) +
39
+ "%22%29&language=it&maxentries=10&target=0&position=1"
40
+ rescue StandardError => ex
41
+ log.warn { "Cannot create url for book #{book}; #{ex.message}" }
42
+ nil
43
+ end
44
+
45
+ private
46
+
47
+ def canonicalise_criterion(criterion, _type)
48
+ canonicalise_isbn_with_dashes(criterion)
49
+ end
50
+
51
+ def request_count(_type)
52
+ 0
53
+ end
54
+
55
+ def canonicalise_isbn_with_dashes(isbn)
56
+ # The reference for the position of the dashes is
57
+ # http://www.isbn-international.org/converter/ranges.htm
58
+
59
+ isbn = Alexandria::Library.canonicalise_isbn(isbn)
60
+
61
+ if isbn[0..1] == "88"
62
+ # Italian speaking area
63
+ if isbn > "8895000" && (isbn <= "8899999996")
64
+ isbn[0..1] + "-" + isbn[2..6] + "-" + isbn[7..8] + "-" + isbn[9..9]
65
+ elsif isbn > "88900000"
66
+ isbn[0..1] + "-" + isbn[2..7] + "-" + isbn[8..8] + "-" + isbn[9..9]
67
+ elsif isbn > "8885000"
68
+ isbn[0..1] + "-" + isbn[2..6] + "-" + isbn[7..8] + "-" + isbn[9..9]
69
+ elsif isbn > "886000"
70
+ isbn[0..1] + "-" + isbn[2..5] + "-" + isbn[6..8] + "-" + isbn[9..9]
71
+ elsif isbn > "88200"
72
+ isbn[0..1] + "-" + isbn[2..4] + "-" + isbn[5..8] + "-" + isbn[9..9]
73
+ elsif isbn > "8800"
74
+ isbn[0..1] + "-" + isbn[2..3] + "-" + isbn[4..8] + "-" + isbn[9..9]
75
+ else
76
+ raise _("Invalid ISBN")
77
+ end
78
+
79
+ else
80
+ isbn
81
+ end
82
+ end
83
+ #
84
+ # Remarks about SBN
85
+ #
86
+ # This provider requires that value of conn.count is 0.
87
+ # It's a Yaz option "Number of records to be retrieved".
88
+ # This provider requires to specify the value of conn.element_set_name = 'F'.
89
+ # It's a Yaz option "Element-Set name of records".
90
+ # See http://www.indexdata.dk/yaz/doc/zoom.resultsets.tkl
91
+ #
92
+ # Dashes:
93
+ # this database requires that Italian books are searched with dashes :(
94
+ # However, they have also books with dashes in wrong positions, for
95
+ # instance 88-061-4934-2
96
+ #
97
+ # References:
98
+ # http://opac.internetculturale.it/cgi-bin/main.cgi?type=field
99
+ # http://www.internetculturale.it/
100
+ # http://sbnonline.sbn.it/zgw/homeit.html
101
+ # http://www.iccu.sbn.it/genera.jsp?id=124
102
+ # with link at http://www.iccu.sbn.it/upload/documenti/cartecsbn.pdf
103
+ # http://www.loc.gov/cgi-bin/zgstart?ACTION=INIT&FORM_HOST_PORT=/prod/www/data/z3950/iccu.html,opac.sbn.it,2100
104
+ # http://gwz.cilea.it/cgi-bin/reportOpac.cgi
105
+ #
106
+ end
107
+ end
108
+ end
@@ -1,27 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- # Copyright (C) 2009 Cathal Mc Ginley
4
- # Copyright (C) 2014 Matijs van Zuijlen
3
+ # This file is part of Alexandria.
5
4
  #
6
- # Alexandria is free software; you can redistribute it and/or
7
- # modify it under the terms of the GNU General Public License as
8
- # published by the Free Software Foundation; either version 2 of the
9
- # License, or (at your option) any later version.
10
- #
11
- # Alexandria is distributed in the hope that it will be useful,
12
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
- # General Public License for more details.
15
- #
16
- # You should have received a copy of the GNU General Public
17
- # License along with Alexandria; see the file COPYING. If not,
18
- # write to the Free Software Foundation, Inc., 51 Franklin Street,
19
- # Fifth Floor, Boston, MA 02110-1301 USA.
5
+ # See the file README.md for authorship and licensing information.
20
6
 
21
7
  # http://de.wikipedia.org/wiki/Thalia_%28Buchhandel%29
22
8
  # Thalia.de bought the Austrian book trade chain Amadeus
23
9
 
24
- # New Tlalia provider, taken from Palatina MetaDataSource and modified
10
+ # New Thalia provider, taken from Palatina MetaDataSource and modified
25
11
  # for Alexandria. (21 Dec 2009)
26
12
 
27
13
  require "net/http"
@@ -31,9 +17,9 @@ require "alexandria/book_providers/web"
31
17
  module Alexandria
32
18
  class BookProviders
33
19
  class ThaliaProvider < WebsiteBasedProvider
34
- include Alexandria::Logging
20
+ include Logging
35
21
 
36
- SITE = "http://www.thalia.de"
22
+ SITE = "https://www.thalia.de"
37
23
  BASE_SEARCH_URL = "#{SITE}/shop/bde_bu_hg_startseite/suche/?%s=%s" # type,term
38
24
 
39
25
  def initialize
@@ -48,7 +34,7 @@ module Alexandria
48
34
 
49
35
  def search(criterion, type)
50
36
  req = create_search_uri(type, criterion)
51
- puts req if $DEBUG
37
+ log.debug { req }
52
38
  html_data = transport.get_response(URI.parse(req))
53
39
  if type == SEARCH_BY_ISBN
54
40
  parse_result_data(html_data.body, criterion)
@@ -80,40 +66,36 @@ module Alexandria
80
66
  def parse_search_result_data(html)
81
67
  doc = html_to_doc(html)
82
68
  book_search_results = []
83
- results_divs = doc / "div.articlePresentationSearchCH"
84
- results_divs.each do |div|
69
+
70
+ results_items = doc / "ul.weitere-formate li.format"
71
+
72
+ results_items.each do |item|
85
73
  result = {}
86
- title_link = div % "div.articleText/h2/a"
87
- result[:title] = title_link.inner_html
88
- result[:lookup_url] = title_link["href"]
74
+ item_link = item % "a"
75
+ result[:lookup_url] = "#{SITE}#{item_link['href']}"
89
76
  book_search_results << result
90
77
  end
91
78
  book_search_results
92
79
  end
93
80
 
94
81
  def data_from_label(node, label_text)
95
- label_node = node % "strong[text()*='#{label_text}']"
96
- if (item_node = label_node.parent)
97
- data = ""
98
- item_node.children.each do |n|
99
- data += n.to_html if n.text?
100
- end
101
- data.strip
102
- else
103
- ""
104
- end
82
+ label_node = node % "th[text()*='#{label_text}']"
83
+ return "" unless label_node
84
+
85
+ item_node = label_node.parent % "td"
86
+ item_node.inner_text.strip
105
87
  end
106
88
 
107
89
  def get_book_from_search_result(result)
108
90
  log.debug { "Fetching book from #{result[:lookup_url]}" }
109
91
  html_data = transport.get_response(URI.parse(result[:lookup_url]))
110
- parse_result_data(html_data.body, "noisbn", true)
92
+ parse_result_data(html_data.body, "noisbn", recursing: true)
111
93
  end
112
94
 
113
- def parse_result_data(html, isbn, recursing = false)
95
+ def parse_result_data(html, isbn, recursing: false)
114
96
  doc = html_to_doc(html)
115
97
 
116
- results_divs = doc / "div.articlePresentationSearchCH"
98
+ results_divs = doc / "ul.weitere-formate"
117
99
  unless results_divs.empty?
118
100
  if recursing
119
101
  # already recursing, avoid doing so endlessly second time
@@ -122,73 +104,54 @@ module Alexandria
122
104
  return
123
105
  end
124
106
 
125
- # ISBN-lookup results in multiple results (trying to be
126
- # useful, such as for new editions e.g. 9780974514055
127
- # "Programming Ruby" )
107
+ # ISBN-lookup results in multiple results
128
108
  results = parse_search_result_data(html)
129
- isbn10 = Library.canonicalise_isbn(isbn)
130
- # e.g. .../dave_thomas/ISBN0-9745140-5-5/ID6017044.html
131
109
  chosen = results.first # fallback!
132
- results.each do |rslt|
133
- if rslt[:lookup_url] =~ %r{/ISBN(\d+[\d-]*)/}
134
- if Regexp.last_match[1].delete("-") == isbn10
135
- chosen = rslt
136
- break
137
- end
138
- end
139
- end
140
110
  html_data = transport.get_response(URI.parse(chosen[:lookup_url]))
141
- return parse_result_data(html_data.body, isbn, true)
111
+ return parse_result_data(html_data.body, isbn, recursing: true)
142
112
  end
143
113
 
144
114
  begin
145
- if (div = doc % "div#contentFull")
146
- title_img = ((div % :h2) / :img).first
147
- title = title_img["alt"]
148
-
149
- # note, the following img also has alt="von Author, Author..."
115
+ if (div = doc % "section#sbe-product-details")
116
+ title = div["data-titel"]
150
117
 
151
- if (author_h = doc % 'h3[text()*="Mehr von"]') # "More from..." links
118
+ if (author_p = doc % "p.aim-author")
152
119
  authors = []
153
- author_links = author_h.parent / :a
120
+ author_links = author_p / :a
154
121
  author_links.each do |a|
155
- if a["href"] =~ %r{BUCH/sa}
156
- # 'sa' means search author, there may also be 'ssw' (search keyword) links
157
- authors << a.inner_text[0..-2].strip
158
- # NOTE stripping the little >> character here...
159
- end
122
+ authors << a.inner_text.strip
160
123
  end
161
124
  end
162
125
 
163
- item_details = doc % "ul.itemDataList"
126
+ item_details = doc % "section.artikeldetails"
164
127
  isbns = []
165
128
  isbns << data_from_label(item_details, "EAN")
166
129
  isbns << data_from_label(item_details, "ISBN")
130
+ isbns.reject!(&:empty?)
167
131
 
168
132
  year = nil
169
- date = data_from_label(item_details, "Erschienen:")
170
- year = Regexp.last_match[1].to_i if date =~ /([\d]{4})/
133
+ date = data_from_label(item_details, "Erscheinungsdatum")
134
+ year = Regexp.last_match[1].to_i if date =~ /(\d{4})/
171
135
 
172
- binding = data_from_label(item_details, "Einband")
136
+ book_binding = data_from_label(item_details, "Einband")
173
137
 
174
- publisher = data_from_label(item_details, "Erschienen bei:")
138
+ publisher = data_from_label(item_details, "Verlag")
175
139
 
176
140
  book = Book.new(title, authors, isbns.first,
177
- publisher, year, binding)
141
+ publisher, year, book_binding)
178
142
 
179
143
  image_url = nil
180
- if (image_link = doc % "a[@id=itemPicStart]")
181
- image_url = image_link["href"]
144
+ if (image = doc % "section.imagesPreview img")
145
+ image_url = image["src"]
182
146
  end
183
147
 
184
148
  [book, image_url]
185
-
186
149
  end
187
150
  rescue StandardError => ex
188
151
  trace = ex.backtrace.join("\n> ")
189
152
  log.warn do
190
153
  "Failed parsing search results for Thalia " \
191
- "#{ex.message} #{trace}"
154
+ "#{ex.message} #{trace}"
192
155
  end
193
156
  raise NoResultsError
194
157
  end
@@ -4,7 +4,7 @@
4
4
  #
5
5
  # See the file README.md for authorship and licensing information.
6
6
 
7
- require "hpricot"
7
+ require "nokogiri"
8
8
  require "htmlentities"
9
9
 
10
10
  module Alexandria
@@ -19,7 +19,7 @@ module Alexandria
19
19
  html.force_encoding source_data_charset
20
20
  utf8_html = html.encode("utf-8")
21
21
  normalized_html = @htmlentities.decode(utf8_html)
22
- Hpricot(normalized_html)
22
+ Nokogiri.parse(normalized_html)
23
23
  end
24
24
 
25
25
  ## from Palatina
@@ -22,7 +22,7 @@ require "alexandria/book_providers/web"
22
22
  module Alexandria
23
23
  class BookProviders
24
24
  class WorldCatProvider < WebsiteBasedProvider
25
- include Alexandria::Logging
25
+ include Logging
26
26
 
27
27
  SITE = "https://www.worldcat.org"
28
28
  BASE_SEARCH_URL = "#{SITE}/search?q=%s%s&qt=advanced" # type, term
@@ -76,11 +76,11 @@ module Alexandria
76
76
  doc = html_to_doc(html, "UTF-8")
77
77
  book_search_results = []
78
78
  begin
79
- result_cells = doc / "td.result/div.name/.."
80
- # puts result_cells.length
81
- result_cells.each do |td|
82
- type_icon = (td % "div.type/img.icn")
83
- next unless type_icon && type_icon["src"] =~ /icon-bks/
79
+ result_divs = doc / "td.result/div.name"
80
+ result_divs.each do |div|
81
+ td = div.parent
82
+ type_icon = td % "div.type/img.icn"
83
+ next unless type_icon && type_icon["src"].include?("icon-bks")
84
84
 
85
85
  name_div = td % "div.name"
86
86
  title = name_div.inner_text
@@ -97,7 +97,7 @@ module Alexandria
97
97
  trace = ex.backtrace.join("\n> ")
98
98
  log.warn do
99
99
  "Failed parsing search results for WorldCat " \
100
- "#{ex.message} #{trace}"
100
+ "#{ex.message} #{trace}"
101
101
  end
102
102
  end
103
103
  book_search_results
@@ -129,22 +129,20 @@ module Alexandria
129
129
  html2 = rslt2.body
130
130
 
131
131
  book, cover_url = parse_result_data(html2, search_isbn, true)
132
- first_result = [book, cover_url] if first_result.nil?
133
132
 
134
133
  log.debug { "got book #{book}" }
135
134
 
136
- if search_isbn
137
- search_isbn_canon = Library.canonicalise_ean(search_isbn)
138
- rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
139
- if search_isbn_canon == rslt_isbn_canon
140
- log.info { "book #{book} is a match" }
141
- return [book, cover_url]
142
- end
143
- log.debug { "not a match, checking next" }
144
- else
145
- # no constraint to match isbn, just return first result
135
+ return [book, cover_url] unless search_isbn
136
+
137
+ first_result = [book, cover_url] if first_result.nil?
138
+
139
+ search_isbn_canon = Library.canonicalise_ean(search_isbn)
140
+ rslt_isbn_canon = Library.canonicalise_ean(book.isbn)
141
+ if search_isbn_canon == rslt_isbn_canon
142
+ log.info { "book #{book} is a match" }
146
143
  return [book, cover_url]
147
144
  end
145
+ log.debug { "not a match, checking next" }
148
146
  end
149
147
 
150
148
  # gone through all and no ISBN match, so just return first result
@@ -152,7 +150,6 @@ module Alexandria
152
150
  "no more results to check. Returning first result, just an approximation"
153
151
  end
154
152
  return first_result
155
-
156
153
  end
157
154
 
158
155
  title_header = doc % "h1.title"
@@ -174,15 +171,16 @@ module Alexandria
174
171
  # can we do better? get the City name?? or multiple publishers?
175
172
  bibdata = doc % "div#bibdata"
176
173
  bibdata_table = bibdata % :table
177
- publisher_row = bibdata_table % "th[text()*=Publisher]/.."
174
+ publisher_header = bibdata_table % "th[text()*=Publisher]"
178
175
 
179
- if publisher_row
176
+ if publisher_header
177
+ publisher_row = publisher_header.parent
180
178
  publication_info = (publisher_row / "td").last.inner_text
181
179
 
182
180
  publication_info =~ if publication_info.index(";")
183
- /;[\s]*([^\d]+)[\s]*[\d]*/
181
+ /;\s*([^\d]+)\s*\d*/
184
182
  elsif publication_info.index(":")
185
- /:[\s]*([^;:,]+)/
183
+ /:\s*([^;:,]+)/
186
184
  else
187
185
  /([^;,]+)/
188
186
  end
@@ -195,20 +193,18 @@ module Alexandria
195
193
  year = nil
196
194
  end
197
195
 
198
- isbn = search_isbn
199
- unless isbn
200
- isbn_row = doc % "tr#details-standardno"
201
- if isbn_row
202
- isbns = (isbn_row / "td").last.inner_text.split
203
- isbn = Library.canonicalise_isbn(isbns.first)
204
- else
205
- log.warn { "No ISBN found on page" }
206
- end
196
+ isbn_row = doc % "tr#details-standardno"
197
+ if isbn_row
198
+ isbns = (isbn_row / "td").last.inner_text.split
199
+ isbn = Library.canonicalise_isbn(isbns.first)
200
+ else
201
+ log.warn { "No ISBN found on page" }
202
+ isbn = search_isbn
207
203
  end
208
204
 
209
- binding = "" # not given on WorldCat website (as far as I can tell)
205
+ book_binding = "" # not given on WorldCat website (as far as I can tell)
210
206
 
211
- book = Book.new(title, authors, isbn, publisher, year, binding)
207
+ book = Book.new(title, authors, isbn, publisher, year, book_binding)
212
208
 
213
209
  image_url = nil # hm, it's on the website, but uses JavaScript...
214
210
 
@@ -219,11 +215,11 @@ module Alexandria
219
215
  trace = ex.backtrace.join("\n> ")
220
216
  log.warn do
221
217
  "Failed parsing search results for WorldCat " \
222
- "#{ex.message} #{trace}"
218
+ "#{ex.message} #{trace}"
223
219
  end
224
220
  raise NoResultsError
225
221
  end
226
222
  end
227
- end # class WorldCatProvider
228
- end # class BookProviders
229
- end # module Alexandria
223
+ end
224
+ end
225
+ end