mabmapper 1.0.0.pre15

Sign up to get free protection for your applications and to get access to all the features.
Files changed (221) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +20 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +7 -0
  5. data/LICENSE +22 -0
  6. data/README.md +49 -0
  7. data/Rakefile +29 -0
  8. data/bin/mabmapper +3 -0
  9. data/lib/mabmapper/aleph_mab_xml_engine.rb +1050 -0
  10. data/lib/mabmapper/cli.rb +216 -0
  11. data/lib/mabmapper/elasticsearch_writer.rb +52 -0
  12. data/lib/mabmapper/engine.rb +112 -0
  13. data/lib/mabmapper/mab_xml/document.rb +53 -0
  14. data/lib/mabmapper/mab_xml/field.rb +43 -0
  15. data/lib/mabmapper/mab_xml/query.rb +25 -0
  16. data/lib/mabmapper/mab_xml/query_helper.rb +101 -0
  17. data/lib/mabmapper/mab_xml/result_set.rb +34 -0
  18. data/lib/mabmapper/mab_xml/subfield.rb +12 -0
  19. data/lib/mabmapper/mab_xml.rb +6 -0
  20. data/lib/mabmapper/tar_writer.rb +29 -0
  21. data/lib/mabmapper/version.rb +3 -0
  22. data/lib/mabmapper.rb +11 -0
  23. data/mabmapper.gemspec +33 -0
  24. data/test/mab_files/test_creation_date/test1.xml +17 -0
  25. data/test/mab_files/test_creation_date/test2.xml +17 -0
  26. data/test/mab_files/test_creationdate/425_a_1.xml +17 -0
  27. data/test/mab_files/test_creationdate/425_a_2.xml +19 -0
  28. data/test/mab_files/test_creationdate/425_bc_1.xml +19 -0
  29. data/test/mab_files/test_creationdate/425_bc_2.xml +22 -0
  30. data/test/mab_files/test_creationdate/425_bc_3.xml +22 -0
  31. data/test/mab_files/test_creationdate/425_bc_4.xml +19 -0
  32. data/test/mab_files/test_creationdate/425_p_1.xml +19 -0
  33. data/test/mab_files/test_creationdate/425_p_2.xml +17 -0
  34. data/test/mab_files/test_creationdate/595_1.xml +20 -0
  35. data/test/mab_files/test_creator_contributor_facet/PAD01.001006945.PRIMO.xml +574 -0
  36. data/test/mab_files/test_description/405.xml +22 -0
  37. data/test/mab_files/test_description/501-519.xml +30 -0
  38. data/test/mab_files/test_description/522.xml +22 -0
  39. data/test/mab_files/test_description/523.xml +22 -0
  40. data/test/mab_files/test_description/536-537.xml +30 -0
  41. data/test/mab_files/test_doc/PAD01.001510737.PRIMO.xml +317 -0
  42. data/test/mab_files/test_edition/PAD01.000844686.PRIMO.xml +584 -0
  43. data/test/mab_files/test_edition/PAD01.000969531.PRIMO.xml +129 -0
  44. data/test/mab_files/test_edition/PAD01.000969710.PRIMO.xml +144 -0
  45. data/test/mab_files/test_edition/PAD01.000978033.PRIMO.xml +163 -0
  46. data/test/mab_files/test_edition/PAD01.000990520.PRIMO.xml +163 -0
  47. data/test/mab_files/test_erscheinungsform/PAD01.000870753.PRIMO.xml +256 -0
  48. data/test/mab_files/test_erscheinungsform/PAD01.000870755.PRIMO.xml +467 -0
  49. data/test/mab_files/test_ht_number/PAD01.001015067.PRIMO.xml +137 -0
  50. data/test/mab_files/test_inhaltstyp/PAD01.000870753.PRIMO.xml +256 -0
  51. data/test/mab_files/test_inhaltstyp/PAD01.000870755.PRIMO.xml +467 -0
  52. data/test/mab_files/test_is_secondary_form/PAD01.000806191.PRIMO.xml +216 -0
  53. data/test/mab_files/test_is_secondary_form/PAD01.000844686.PRIMO.xml +584 -0
  54. data/test/mab_files/test_is_secondary_form/PAD01.001015067.PRIMO.xml +137 -0
  55. data/test/mab_files/test_is_secondary_form/PAD01.001452439.PRIMO.xml +377 -0
  56. data/test/mab_files/test_is_suborder/PAD01.000806191.PRIMO.xml +216 -0
  57. data/test/mab_files/test_is_suborder/PAD01.000844686.PRIMO.xml +584 -0
  58. data/test/mab_files/test_is_suborder/PAD01.001452439.PRIMO.xml +377 -0
  59. data/test/mab_files/test_issn/PAD01.000637121.PRIMO.xml +805 -0
  60. data/test/mab_files/test_materialtyp/PAD01.000870753.PRIMO.xml +256 -0
  61. data/test/mab_files/test_materialtyp/PAD01.000870755.PRIMO.xml +467 -0
  62. data/test/mab_files/test_notation_sort/PAD01.000970649.PRIMO.xml +306 -0
  63. data/test/mab_files/test_notation_sort/PAD01.001006944.PRIMO.xml +279 -0
  64. data/test/mab_files/test_publisher/PAD01.000312406.PRIMO.xml +1043 -0
  65. data/test/mab_files/test_redactional_remark/PAD01.001510737.PRIMO.xml +317 -0
  66. data/test/mab_files/test_relation/PAD01.000438377.PRIMO.xml +232 -0
  67. data/test/mab_files/test_relation/PAD01.000637121.PRIMO.xml +810 -0
  68. data/test/mab_files/test_relation/PAD01.000806191.PRIMO.xml +216 -0
  69. data/test/mab_files/test_relation/PAD01.000844686.PRIMO.xml +584 -0
  70. data/test/mab_files/test_relation/PAD01.001452439.PRIMO.xml +377 -0
  71. data/test/mab_files/test_secondary_form_creationdate/PAD01.000806191.PRIMO.xml +216 -0
  72. data/test/mab_files/test_secondary_form_creationdate/PAD01.000844686.PRIMO.xml +584 -0
  73. data/test/mab_files/test_secondary_form_creationdate/PAD01.001452439.PRIMO.xml +377 -0
  74. data/test/mab_files/test_secondary_form_isbn/PAD01.000806191.PRIMO.xml +216 -0
  75. data/test/mab_files/test_secondary_form_isbn/PAD01.000844686.PRIMO.xml +584 -0
  76. data/test/mab_files/test_secondary_form_isbn/PAD01.001452439.PRIMO.xml +377 -0
  77. data/test/mab_files/test_secondary_form_physical_description/PAD01.000806191.PRIMO.xml +216 -0
  78. data/test/mab_files/test_secondary_form_physical_description/PAD01.001452439.PRIMO.xml +377 -0
  79. data/test/mab_files/test_secondary_form_preliminary_phrase/PAD01.000806191.PRIMO.xml +216 -0
  80. data/test/mab_files/test_secondary_form_preliminary_phrase/PAD01.001452439.PRIMO.xml +377 -0
  81. data/test/mab_files/test_secondary_form_publisher/PAD01.000806191.PRIMO.xml +216 -0
  82. data/test/mab_files/test_secondary_form_publisher/PAD01.001452439.PRIMO.xml +377 -0
  83. data/test/mab_files/test_secondary_form_superorder/PAD01.000806191.PRIMO.xml +216 -0
  84. data/test/mab_files/test_secondary_form_superorder/PAD01.000977734.PRIMO.xml +225 -0
  85. data/test/mab_files/test_secondary_form_superorder/PAD01.001452439.PRIMO.xml +377 -0
  86. data/test/mab_files/test_short_title_display/PAD01.000057960.PRIMO.xml +1069 -0
  87. data/test/mab_files/test_short_title_display/PAD01.000058000.PRIMO.xml +995 -0
  88. data/test/mab_files/test_short_title_display/PAD01.000215104.PRIMO.xml +191 -0
  89. data/test/mab_files/test_short_title_display/PAD01.000310864.PRIMO.xml +999 -0
  90. data/test/mab_files/test_short_title_display/PAD01.000392641.PRIMO.xml +4334 -0
  91. data/test/mab_files/test_short_title_display/PAD01.000392645.PRIMO.xml +4094 -0
  92. data/test/mab_files/test_short_title_display/PAD01.000438377.PRIMO.xml +232 -0
  93. data/test/mab_files/test_short_title_display/PAD01.000479391.PRIMO.xml +142 -0
  94. data/test/mab_files/test_short_title_display/PAD01.000637121.PRIMO.xml +805 -0
  95. data/test/mab_files/test_short_title_display/PAD01.000676616.PRIMO.xml +128 -0
  96. data/test/mab_files/test_short_title_display/PAD01.000782994.PRIMO.xml +169 -0
  97. data/test/mab_files/test_short_title_display/PAD01.001006945.PRIMO.xml +574 -0
  98. data/test/mab_files/test_short_title_display/PAD01.001015067.PRIMO.xml +137 -0
  99. data/test/mab_files/test_short_title_display/PAD01.001015070.PRIMO.xml +212 -0
  100. data/test/mab_files/test_short_title_display/PAD01.001108212.PRIMO.xml +259 -0
  101. data/test/mab_files/test_short_title_display/PAD01.001249043.PRIMO.xml +172 -0
  102. data/test/mab_files/test_short_title_display/PAD01.001499877.PRIMO.xml +227 -0
  103. data/test/mab_files/test_short_title_display/PAD01.001499879.PRIMO.xml +255 -0
  104. data/test/mab_files/test_short_title_display/PAD01.001499880.PRIMO.xml +279 -0
  105. data/test/mab_files/test_short_title_display/PAD01.001510878.PRIMO.xml +184 -0
  106. data/test/mab_files/test_short_title_display/PAD01.001562173.PRIMO.xml +116 -0
  107. data/test/mab_files/test_short_title_display/PAD01.001568334.PRIMO.xml +1840 -0
  108. data/test/mab_files/test_short_title_display/PAD01.001572048.PRIMO.xml +68 -0
  109. data/test/mab_files/test_short_title_display/PAD01.001572049.PRIMO.xml +133 -0
  110. data/test/mab_files/test_signature/PAD01.000161445.PRIMO.xml +149 -0
  111. data/test/mab_files/test_signature/PAD01.000321365.PRIMO.xml +343 -0
  112. data/test/mab_files/test_signature/PAD01.000636652.PRIMO.xml +217 -0
  113. data/test/mab_files/test_signature/PAD01.000857994.PRIMO.xml +187 -0
  114. data/test/mab_files/test_signature/PAD01.000859176.PRIMO.xml +559 -0
  115. data/test/mab_files/test_signature/PAD01.000969442.PRIMO.xml +210 -0
  116. data/test/mab_files/test_signature/PAD01.001006945.PRIMO.xml +574 -0
  117. data/test/mab_files/test_signature_search/PAD01.000161445.PRIMO.xml +149 -0
  118. data/test/mab_files/test_signature_search/PAD01.000321365.PRIMO.xml +343 -0
  119. data/test/mab_files/test_signature_search/PAD01.000636652.PRIMO.xml +217 -0
  120. data/test/mab_files/test_signature_search/PAD01.000857994.PRIMO.xml +187 -0
  121. data/test/mab_files/test_signature_search/PAD01.000859176.PRIMO.xml +559 -0
  122. data/test/mab_files/test_signature_search/PAD01.000969442.PRIMO.xml +210 -0
  123. data/test/mab_files/test_signature_search/PAD01.001006945.PRIMO.xml +574 -0
  124. data/test/mab_files/test_status/PAD01.000321365.PRIMO.xml +343 -0
  125. data/test/mab_files/test_status/PAD01.000392641.PRIMO.xml +4337 -0
  126. data/test/mab_files/test_status/detmold_1.xml +17 -0
  127. data/test/mab_files/test_status/detmold_2.xml +17 -0
  128. data/test/mab_files/test_status/detmold_3.xml +12 -0
  129. data/test/mab_files/test_subject/PAD01.000972511.PRIMO.xml +406 -0
  130. data/test/mab_files/test_suborders/PAD01.000057960.PRIMO.xml +1069 -0
  131. data/test/mab_files/test_suborders/PAD01.000058000.PRIMO.xml +995 -0
  132. data/test/mab_files/test_suborders/PAD01.000215104.PRIMO.xml +191 -0
  133. data/test/mab_files/test_suborders/PAD01.000310864.PRIMO.xml +999 -0
  134. data/test/mab_files/test_suborders/PAD01.000392641.PRIMO.xml +4334 -0
  135. data/test/mab_files/test_suborders/PAD01.000392645.PRIMO.xml +4094 -0
  136. data/test/mab_files/test_suborders/PAD01.000438377.PRIMO.xml +232 -0
  137. data/test/mab_files/test_suborders/PAD01.000479391.PRIMO.xml +142 -0
  138. data/test/mab_files/test_suborders/PAD01.000637121.PRIMO.xml +805 -0
  139. data/test/mab_files/test_suborders/PAD01.000676616.PRIMO.xml +128 -0
  140. data/test/mab_files/test_suborders/PAD01.001006945.PRIMO.xml +574 -0
  141. data/test/mab_files/test_suborders/PAD01.001015067.PRIMO.xml +137 -0
  142. data/test/mab_files/test_suborders/PAD01.001015068.PRIMO.xml +216 -0
  143. data/test/mab_files/test_suborders/PAD01.001015070.PRIMO.xml +212 -0
  144. data/test/mab_files/test_suborders/PAD01.001108212.PRIMO.xml +259 -0
  145. data/test/mab_files/test_suborders/PAD01.001499877.PRIMO.xml +227 -0
  146. data/test/mab_files/test_suborders/PAD01.001499879.PRIMO.xml +255 -0
  147. data/test/mab_files/test_suborders/PAD01.001499880.PRIMO.xml +279 -0
  148. data/test/mab_files/test_suborders/PAD01.001562173.PRIMO.xml +116 -0
  149. data/test/mab_files/test_suborders/PAD01.001572048.PRIMO.xml +68 -0
  150. data/test/mab_files/test_suborders/PAD01.001572049.PRIMO.xml +133 -0
  151. data/test/mab_files/test_superorder/PAD01.000806191.PRIMO.xml +216 -0
  152. data/test/mab_files/test_superorder/PAD01.000844686.PRIMO.xml +584 -0
  153. data/test/mab_files/test_superorder/PAD01.001015067.PRIMO.xml +137 -0
  154. data/test/mab_files/test_superorder/PAD01.001452439.PRIMO.xml +377 -0
  155. data/test/mab_files/test_superorder_display/PAD01.000000872.PRIMO.xml +227 -0
  156. data/test/mab_files/test_superorder_display/PAD01.000160412.PRIMO.xml +518 -0
  157. data/test/mab_files/test_superorder_display/PAD01.000162669.PRIMO.xml +198 -0
  158. data/test/mab_files/test_superorder_display/PAD01.000178500.PRIMO.xml +158 -0
  159. data/test/mab_files/test_superorder_display/PAD01.000297043.PRIMO.xml +154 -0
  160. data/test/mab_files/test_superorder_display/PAD01.000562878.PRIMO.xml +1214 -0
  161. data/test/mab_files/test_superorder_display/PAD01.000958473.PRIMO.xml +379 -0
  162. data/test/mab_files/test_superorder_display/PAD01.001006945.PRIMO.xml +574 -0
  163. data/test/mab_files/test_superorders/PAD01.000057960.PRIMO.xml +1069 -0
  164. data/test/mab_files/test_superorders/PAD01.000215104.PRIMO.xml +191 -0
  165. data/test/mab_files/test_superorders/PAD01.000310864.PRIMO.xml +999 -0
  166. data/test/mab_files/test_superorders/PAD01.000392641.PRIMO.xml +4334 -0
  167. data/test/mab_files/test_superorders/PAD01.000438377.PRIMO.xml +232 -0
  168. data/test/mab_files/test_superorders/PAD01.000479391.PRIMO.xml +142 -0
  169. data/test/mab_files/test_superorders/PAD01.000637121.PRIMO.xml +805 -0
  170. data/test/mab_files/test_superorders/PAD01.001015067.PRIMO.xml +137 -0
  171. data/test/mab_files/test_superorders/PAD01.001499877.PRIMO.xml +227 -0
  172. data/test/mab_files/test_superorders/PAD01.001572048.PRIMO.xml +68 -0
  173. data/test/mab_files/test_title_display/PAD01.000954111.PRIMO.xml +162 -0
  174. data/test/mab_files/test_title_display/PAD01.000992332.PRIMO.xml +189 -0
  175. data/test/mab_files/test_title_display/PAD01.001015068.PRIMO.xml +216 -0
  176. data/test/mab_files/test_title_display/PAD01.001499879.PRIMO.xml +255 -0
  177. data/test/mab_files/test_title_search/test_1.xml +20 -0
  178. data/test/mab_files/test_title_sort/PAD01.000954111.PRIMO.xml +162 -0
  179. data/test/mab_files/test_title_sort/PAD01.000992332.PRIMO.xml +189 -0
  180. data/test/mab_files/test_volume_count_sort/PAD01.001015068.PRIMO.xml +216 -0
  181. data/test/mab_files/test_volume_count_sort/PAD01.001499879.PRIMO.xml +255 -0
  182. data/test/mabmapper/test_creation_date.rb +5 -0
  183. data/test/mabmapper/test_creationdate.rb +23 -0
  184. data/test/mabmapper/test_creator_contributor_facet.rb +4 -0
  185. data/test/mabmapper/test_description.rb +9 -0
  186. data/test/mabmapper/test_doc.rb +6 -0
  187. data/test/mabmapper/test_edition.rb +8 -0
  188. data/test/mabmapper/test_erscheinungsform.rb +5 -0
  189. data/test/mabmapper/test_ht_number.rb +4 -0
  190. data/test/mabmapper/test_inhaltstyp.rb +5 -0
  191. data/test/mabmapper/test_is_secondary_form.rb +7 -0
  192. data/test/mabmapper/test_is_suborder.rb +7 -0
  193. data/test/mabmapper/test_issn.rb +4 -0
  194. data/test/mabmapper/test_materialtyp.rb +5 -0
  195. data/test/mabmapper/test_notation_sort.rb +5 -0
  196. data/test/mabmapper/test_publisher.rb +5 -0
  197. data/test/mabmapper/test_redactional_remark.rb +4 -0
  198. data/test/mabmapper/test_relation.rb +16 -0
  199. data/test/mabmapper/test_secondary_form_creationdate.rb +6 -0
  200. data/test/mabmapper/test_secondary_form_isbn.rb +6 -0
  201. data/test/mabmapper/test_secondary_form_physical_description.rb +5 -0
  202. data/test/mabmapper/test_secondary_form_preliminary_phrase.rb +5 -0
  203. data/test/mabmapper/test_secondary_form_publisher.rb +5 -0
  204. data/test/mabmapper/test_secondary_form_superorder.rb +9 -0
  205. data/test/mabmapper/test_short_title_display.rb +27 -0
  206. data/test/mabmapper/test_signature.rb +12 -0
  207. data/test/mabmapper/test_signature_search.rb +12 -0
  208. data/test/mabmapper/test_status.rb +13 -0
  209. data/test/mabmapper/test_subject.rb +5 -0
  210. data/test/mabmapper/test_suborders.rb +192 -0
  211. data/test/mabmapper/test_superorder.rb +7 -0
  212. data/test/mabmapper/test_superorder_display.rb +22 -0
  213. data/test/mabmapper/test_superorders.rb +38 -0
  214. data/test/mabmapper/test_title_display.rb +12 -0
  215. data/test/mabmapper/test_title_search.rb +4 -0
  216. data/test/mabmapper/test_title_sort.rb +6 -0
  217. data/test/mabmapper/test_volume_count_sort.rb +5 -0
  218. data/test/test_helper.rb +53 -0
  219. data/test/test_mabmapper.rb +19 -0
  220. data/utils/mab_by_docid.sh +19 -0
  221. metadata +574 -0
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormCreationdate
3
+ define_field_test '000806191', secondary_form_creationdate: nil
4
+ define_field_test '000844686', secondary_form_creationdate: '2001'
5
+ define_field_test '001452439', secondary_form_creationdate: '2012'
6
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormIsbn
3
+ define_field_test '000806191', secondary_form_isbn: '3-8288-0675-9'
4
+ define_field_test '000844686', secondary_form_isbn: '3-8288-1141-8'
5
+ define_field_test '001452439', secondary_form_isbn: nil
6
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormPhysicalDescription
3
+ define_field_test '000806191', secondary_form_physical_description: '2 Mikrofiches : 24x'
4
+ define_field_test '001452439', secondary_form_physical_description: nil
5
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormPreliminaryPhrase
3
+ define_field_test '000806191', secondary_form_preliminary_phrase: 'Mikrofiche-Ausg.:'
4
+ define_field_test '001452439', secondary_form_preliminary_phrase: 'Digitalisierte Ausg.'
5
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormPublisher
3
+ define_field_test '000806191', secondary_form_publisher: 'Marburg : Tectum-Verl.'
4
+ define_field_test '001452439', secondary_form_publisher: 'Paderborn : Universitätsbibliothek Paderborn'
5
+ end
@@ -0,0 +1,9 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormSuperorder
3
+ define_field_test '000806191', secondary_form_superorder: {"ht_number" => "HT006670284","label" => "Edition Wissenschaft : Reihe Chemie ; 240","volume_count" => "240"}
4
+ define_field_test '000977734', secondary_form_superorder: [
5
+ {"ht_number"=>nil, "label"=>"Zeitschriften der HAAB Weimar. Projekt Sicherungsverfilmung der HAAB Weimar", "volume_count"=>nil},
6
+ {"ht_number"=>nil, "label"=>"Faustsammlung der HAAB Weimar", "volume_count"=>nil}
7
+ ]
8
+ define_field_test '001452439', secondary_form_superorder: {"ht_number" => nil, "label" => "Digitale Sammlungen der Universitätsbibliothek Paderborn", "volume_count" => nil}
9
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ module TestShortTitleDisplay
3
+ define_field_test '000057960', short_title_display: 'Sämtliche Werke : historisch-kritische Ausgabe'
4
+ define_field_test '000058000', short_title_display: 'Tannhäuser'
5
+ define_field_test '000215104', short_title_display: 'Hiersemanns bibliographische Handbücher'
6
+ define_field_test '000310864', short_title_display: 'Paderborner Universitätsreden'
7
+ define_field_test '000392641', short_title_display: 'Hegel-Jahrbuch'
8
+ define_field_test '000392645', short_title_display: 'Hegel-Jahrbuch'
9
+ define_field_test '000438377', short_title_display: 'Urkundenregesten zur Tätigkeit des deutschen Königs- und Hofgerichts bis 1451'
10
+ define_field_test '000479391', short_title_display: 'Quellen und Forschungen zur höchsten Gerichtsbarkeit im alten Reich'
11
+ define_field_test '000637121', short_title_display: 'Zeitschrift für Familienforschung : ZfF'
12
+ define_field_test '000676616', short_title_display: 'Recht und Staat'
13
+ define_field_test '000782994', short_title_display: 'Rot'
14
+ define_field_test '001006945', short_title_display: 'Die Zeit Wenzels 1397 - 1400'
15
+ define_field_test '001015067', short_title_display: 'Forum Geschichte kompakt'
16
+ define_field_test '001015070', short_title_display: 'Vom Ende des Ersten Weltkriegs bis zur Gegenwart [Schülerbd.]'
17
+ define_field_test '001108212', short_title_display: 'Johann Nestroy - Dokumente'
18
+ define_field_test '001249043', short_title_display: 'Fractions, decimals, ratios, and percents' # replace [Hauptbd.] with name of superorder
19
+ define_field_test '001499877', short_title_display: 'Fakten und Fiktionen : Werklexikon der deutschsprachigen Schlüsselliteratur ; 1900 - 2010'
20
+ define_field_test '001499879', short_title_display: 'Andres bis Loest'
21
+ define_field_test '001499880', short_title_display: 'Heinrich Mann bis Zwerenz'
22
+ define_field_test '001510878', short_title_display: 'Tanzhaus' # short title would be '[Buch]', so take the superorder title
23
+ define_field_test '001562173', short_title_display: 'Schwerpunktthema: Türkische Familien in Deutschland - Generationenbeziehungen und Generationenperspektiven'
24
+ define_field_test '001568334', short_title_display: 'Software Engineering 2013' # short title would be 'Buch', so take the superorder title
25
+ define_field_test '001572048', short_title_display: 'Paderborner Rathaus-Vorlesungen'
26
+ define_field_test '001572049', short_title_display: 'Alte und Neue Welt : zur wechselvollen Geschichte transatlanitischer Kulturkontakte'
27
+ end
@@ -0,0 +1,12 @@
1
+ # coding: utf-8
2
+ module TestSignature
3
+ define_field_test '000321365', signature: 'P10/34k12'
4
+ define_field_test '000636652', signature: 'P10/34m3'
5
+ define_field_test '000857994', signature: 'P10/34t26'
6
+ define_field_test '000859176', signature: 'KDVD1105'
7
+ define_field_test '000969442', signature: 'TWR12765'
8
+
9
+ # Signatur mit anhängiger Bandzählung
10
+ define_field_test '000161445', signature: 'KXW4113-80/81'
11
+ define_field_test '001006945', signature: 'LKL2468-14'
12
+ end
@@ -0,0 +1,12 @@
1
+ # coding: utf-8
2
+ module TestSignatureSearch
3
+ define_field_test '000321365', signature_search: ["34K12", "P10/34K12"]
4
+ define_field_test '000636652', signature_search: ["P10/34M3", "34M3"]
5
+ define_field_test '000857994', signature_search: ["34T26", "P10/34T26"]
6
+ define_field_test '000859176', signature_search: "KDVD1105"
7
+ define_field_test '000969442', signature_search: ["TWR12765+4", "TWR12765", "TWR12765+1", "TWR12765+2", "TWR12765+3"]
8
+
9
+ # Signatur mit anhängiger Bandzählung
10
+ define_field_test '000161445', signature_search: ["KXW4113-80/81", "KXW4113-80", "KXW4113"]
11
+ define_field_test '001006945', signature_search: ["LKL2468-14", "LKL2468"]
12
+ end
@@ -0,0 +1,13 @@
1
+ # coding: utf-8
2
+ module TestStatus
3
+ # gelöscht -> LDR Position 6 == 'd'
4
+ define_field_test '000321365', status: 'D'
5
+
6
+ # ausgesondert über Feld 078
7
+ define_field_test '000392641', status: 'D'
8
+
9
+ # Standort Detmold unterdrücken
10
+ define_field_test 'detmold_1', status: 'D'
11
+ define_field_test 'detmold_2', status: 'A'
12
+ define_field_test 'detmold_3', status: 'A'
13
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSubject
3
+ # << ... >> should be removed
4
+ define_field_test '000972511', subject: ["Vischer, Friedrich Theodor von", "Faust"] # Vischer, Friedrich Theodor &lt;&lt;von&gt;&gt;
5
+ end
@@ -0,0 +1,192 @@
1
+ # coding: utf-8
2
+ module TestSuborders
3
+ # case 1 (a <- b AND a <- c)
4
+ def test_001015068_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT015707971'); end
5
+ def test_001015068_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end
6
+ def test_001015068_has_superorder_display
7
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
8
+ ht_number: "HT015707971",
9
+ label: "Forum Geschichte kompakt",
10
+ volume_count: "Bd. 2, Teilbd. 1, Von der Frühen Neuzeit bis zum Ersten Weltkrieg [Schülerbd.]",
11
+ label_additions: nil
12
+ })
13
+ end
14
+
15
+ def test_001015070_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT015707971'); end
16
+ def test_001015070_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end
17
+ def test_001015070_has_superorder_display
18
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
19
+ ht_number: "HT015707971",
20
+ label: "Forum Geschichte kompakt",
21
+ volume_count: "Bd. 2, Teilbd. 2, Vom Ende des Ersten Weltkriegs bis zur Gegenwart [Schülerbd.]",
22
+ label_additions: nil
23
+ })
24
+ end
25
+
26
+ # case 2 (a <- b <- c)
27
+ def test_001499879_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT017055615'); end;
28
+ def test_001499879_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
29
+ def test_001499879_has_superorder_display
30
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
31
+ ht_number: "HT017055615",
32
+ label: "Fakten und Fiktionen",
33
+ volume_count: "1",
34
+ label_additions: nil
35
+ })
36
+ end
37
+
38
+ def test_001499880_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT017055615'); end;
39
+ def test_001499880_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
40
+ def test_001499880_has_superorder_display
41
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
42
+ ht_number: "HT017055615",
43
+ label: "Fakten und Fiktionen",
44
+ volume_count: "2",
45
+ label_additions: nil
46
+ })
47
+ end
48
+
49
+ def test_001499877_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT001260573'); end;
50
+ def test_001499877_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
51
+ def test_001499877_has_superorder_display
52
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
53
+ ht_number: "HT001260573",
54
+ label: "Hiersemanns bibliographische Handbücher",
55
+ volume_count: "21",
56
+ label_additions: nil
57
+ })
58
+ end
59
+
60
+ # case 3 (b <- a AND c <- a)
61
+ def test_001006945_has_superorders; assert_has_superorders(doc_id_from_method_name(__method__), ['HT002919097', 'HT003165994']); end;
62
+ def test_001006945_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
63
+ def test_001006945_has_superorder_display
64
+ assert_has_superorder_display_values(doc_id_from_method_name(__method__), [{
65
+ "ht_number" => "HT002919097",
66
+ "label" => "Urkundenregesten zur Tätigkeit des deutschen Königs- und Hofgerichts bis 1451",
67
+ "volume_count" => "14",
68
+ "label_additions" => nil
69
+ }, {
70
+ "ht_number" => "HT003165994",
71
+ "label" => "Quellen und Forschungen zur höchsten Gerichtsbarkeit im alten Reich",
72
+ "volume_count" => "14",
73
+ "label_additions" => ["Sonderreihe"]
74
+ }])
75
+ end
76
+
77
+ # case 4 (a <- b)
78
+ def test_001562173_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT005081594'); end;
79
+ def test_001562173_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
80
+ def test_001562173_has_superorder_display
81
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
82
+ "ht_number" => "HT005081594",
83
+ "label" => "Zeitschrift für Familienforschung",
84
+ "volume_count" => "25,1",
85
+ "label_additions" => nil
86
+ })
87
+ end
88
+
89
+ # case 5 (b <- a AND c <- a)
90
+ def test_001572049_has_superorders; assert_has_superorders(doc_id_from_method_name(__method__), ['HT002162049', 'HT017670666']); end;
91
+ def test_001572049_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
92
+ def test_001572049_has_superorder_display
93
+ assert_has_superorder_display_values(doc_id_from_method_name(__method__), [{
94
+ "ht_number" => "HT002162049",
95
+ "label" => "Paderborner Universitätsreden",
96
+ "volume_count" => "127",
97
+ "label_additions" => nil
98
+ }, {
99
+ "ht_number" => "HT017670666",
100
+ "label" => "Paderborner Rathaus-Vorlesungen",
101
+ "volume_count" => "1",
102
+ "label_additions" => nil
103
+ }])
104
+ end
105
+
106
+ # case 6 (a <- b AND a <- c)
107
+ def test_000676616_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT002672902'); end;
108
+ def test_000676616_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
109
+ def test_000676616_has_superorder_display
110
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
111
+ "ht_number" => "HT002672902",
112
+ "label" => "Hegel-Jahrbuch",
113
+ "volume_count" => "1993/94",
114
+ "label_additions" => nil
115
+ })
116
+ end
117
+
118
+ def test_000392645_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT002672902'); end;
119
+ def test_000392645_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
120
+ def test_000392645_has_superorder_display
121
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
122
+ "ht_number" => "HT002672902",
123
+ "label" => "Hegel-Jahrbuch",
124
+ "volume_count" => "1966",
125
+ "label_additions" => nil
126
+ })
127
+ end
128
+
129
+ # case 7 (a <- b AND a <- c)
130
+ def test_000058000_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT000289652'); end;
131
+ def test_000058000_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
132
+ def test_000058000_has_superorder_display
133
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
134
+ "ht_number" => "HT000289652",
135
+ "label" => "Sämtliche Werke",
136
+ "volume_count" => "Stücke 36",
137
+ "label_additions" => nil
138
+ })
139
+ end
140
+
141
+ def test_001108212_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT000289652'); end;
142
+ def test_001108212_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
143
+ def test_001108212_has_superorder_display
144
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
145
+ "ht_number" => "HT000289652",
146
+ "label" => "Sämtliche Werke / Johann Nestroy",
147
+ "volume_count" => "",
148
+ "label_additions" => nil
149
+ })
150
+ end
151
+
152
+ private
153
+
154
+ def assert_has_superorder(doc_id, superorder_id)
155
+ assert_has_superorders(doc_id, [superorder_id])
156
+ end
157
+
158
+ def assert_has_superorder_display_value(doc_id, superorder_display_values)
159
+ assert_has_superorder_display_values(doc_id, [superorder_display_values])
160
+ end
161
+
162
+ def assert_has_superorder_display_values(doc_id, superorder_display_values)
163
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_suborders', doc_id)).to_xml)
164
+ superorder_display_nodes = doc.css('superorder_display')
165
+
166
+ superorder_display_values.each_with_index do |superorder_display_value, index|
167
+ assert_equal(superorder_display_value.stringify_keys, JSON.parse(superorder_display_nodes[index].text))
168
+ end
169
+ end
170
+
171
+ def assert_has_superorders(doc_id, superorder_ids)
172
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_suborders', doc_id)).to_xml)
173
+ superorders = doc.css('superorder')
174
+
175
+ assert_equal(true, superorders.present?)
176
+ assert_equal(superorder_ids.length, superorders.length)
177
+
178
+ superorder_ids.each do |superorder_id|
179
+ assert_includes(superorders.map(&:text), superorder_id)
180
+ end
181
+ end
182
+
183
+ def assert_is_suborder(doc_id)
184
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_suborders', doc_id)).to_xml)
185
+ assert_equal(true, (value = doc.css('is_suborder').text).present?)
186
+ assert_equal('true', value)
187
+ end
188
+
189
+ def doc_id_from_method_name(method_name)
190
+ method_name[/\d+/]
191
+ end
192
+ end
@@ -0,0 +1,7 @@
1
+ # coding: utf-8
2
+ module TestSuperorder
3
+ # check for superorders of secondary forms (mab 623 and 629)
4
+ define_field_test '000806191', superorder: 'HT006670284'
5
+ define_field_test '000844686', superorder: 'HT007082773'
6
+ define_field_test '001452439', superorder: nil
7
+ end
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ module TestSuperorderDisplay
3
+
4
+ # superorder label with leading '...' but without ';' or ':' between dots and label
5
+ define_field_test '000000872', superorder_display: [{"ht_number"=>"HT001310809", "label"=>"Wissenschaftliche Tagung der Arbeitsgemeinschaft für Klinische Diätetik", "volume_count"=>"2", "label_additions"=>nil}, {"ht_number"=>"HT002182783", "label"=>"Aktuelle Ernährungsmedizin", "volume_count"=>"[3], Suppl", "label_additions"=>nil}]
6
+
7
+ # for the label everything behind ':' should be removed
8
+ define_field_test '000162669', superorder_display: {"ht_number"=>"HT001231518", "label"=>"Handbuch der Dogmengeschichte", "volume_count"=>"Bd. 1, Das Dasein im Glauben ; Fasz. 4", "label_additions"=>nil}
9
+ define_field_test '000178500', superorder_display: {"ht_number"=>"HT001231518", "label"=>"Handbuch der Dogmengeschichte", "volume_count"=>"Bd. 3, Christologie, Soteriologie, Ekklesiologie, Mariologie, Gnadenlehre ; Fasz.4", "label_additions"=>nil}
10
+ define_field_test '000297043', superorder_display: {"ht_number"=>"HT001231518", "label"=>"Handbuch der Dogmengeschichte", "volume_count"=>"Bd. 4, Sakramente, Eschatologie ; Fasz. 1", "label_additions"=>nil}
11
+
12
+ # << ... >> should be removed
13
+ define_field_test '000958473', superorder_display: [{"ht_number"=>"HT001231617", "label"=>"Hessische Forschungen", "volume_count"=>"47", "label_additions"=>nil}, {"ht_number"=>"HT003779625", "label"=>"Die Geschichte unserer Heimat", "volume_count"=>"45", "label_additions"=>nil}]
14
+
15
+ # label additions
16
+ define_field_test '000160412', superorder_display: {"ht_number"=>"HT001237362", "label"=>"Historische Zeitschrift", "volume_count"=>"[N.F.],1", "label_additions"=>["Beiheft"]}
17
+ define_field_test '001006945', superorder_display: [{"ht_number"=>"HT002919097", "label"=>"Urkundenregesten zur Tätigkeit des deutschen Königs- und Hofgerichts bis 1451", "volume_count"=>"14", "label_additions"=>nil}, {"ht_number"=>"HT003165994", "label"=>"Quellen und Forschungen zur höchsten Gerichtsbarkeit im alten Reich", "volume_count"=>"14", "label_additions"=>["Sonderreihe"]}]
18
+
19
+ # sometimes there are multiple 451 fields with one startingen with '...', if this is the case, try the other one
20
+ define_field_test '000562878', superorder_display: [{"ht_number"=>"HT003809808", "label"=>"Management & marketing dictionary", "volume_count"=>"1", "label_additions"=>nil}, {"ht_number"=>"HT002100889", "label"=>"dtv", "volume_count"=>"5815 : Beck-Wirtschaftsberater", "label_additions"=>nil}]
21
+
22
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ module TestSuperorders
3
+ # case 1
4
+ def test_001015067_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
5
+
6
+ # case 2
7
+ def test_001499877_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
8
+ def test_000215104_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
9
+
10
+ # case 3
11
+ def test_000438377_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
12
+ def test_000479391_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
13
+
14
+ # case 4
15
+ def test_000637121_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
16
+
17
+ # case 5
18
+ def test_000310864_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
19
+ def test_001572048_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
20
+
21
+ # case 6
22
+ def test_000392641_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
23
+
24
+ # case 7
25
+ def test_000057960_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
26
+
27
+ private
28
+
29
+ def assert_is_superorder(doc_id)
30
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_superorders', doc_id)).to_xml)
31
+ assert_equal(true, (value = doc.css('is_superorder').text).present?)
32
+ assert_equal('true', value)
33
+ end
34
+
35
+ def doc_id_from_method_name(method_name)
36
+ method_name[/\d+/]
37
+ end
38
+ end
@@ -0,0 +1,12 @@
1
+ # coding: utf-8
2
+ module TestTitleDisplay
3
+ # should insert 'Bd.'
4
+ define_field_test '001499879', title_display: 'Fakten und Fiktionen : Werklexikon der deutschsprachigen Schlüsselliteratur ; 1900 - 2010. Bd. 1. Andres bis Loest'
5
+
6
+ # should not insert 'Bd.'
7
+ define_field_test '001015068', title_display: 'Forum Geschichte kompakt. Bd. 2, Teilbd. 1, Von der Frühen Neuzeit bis zum Ersten Weltkrieg [Schülerbd.]'
8
+
9
+ # '<<' and '>>' should be removed
10
+ define_field_test '000954111', title_display: 'Kitakantō-igaku = The Kitakanto medical journal'
11
+ define_field_test '000992332', title_display: 'Der Hexenbürgermeister von Lemgo : ein Lesedrama in Versen'
12
+ end
@@ -0,0 +1,4 @@
1
+ # coding: utf-8
2
+ module TestTitleSearch
3
+ define_field_test 'test_1', title: ['AAA', 'XXX', 'YYY']
4
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ module TestTitleSort
3
+ # << ... >> should be removed
4
+ define_field_test '000954111', title_sort: 'Kitakantō-igaku = Kitakanto medical journal'
5
+ define_field_test '000992332', title_sort: 'Hexenbürgermeister von Lemgo : ein Lesedrama in Versen'
6
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestVolumeCountSort
3
+ define_field_test '001015068', volume_count_sort: '0002,x,1,schuel'
4
+ define_field_test '001499879', volume_count_sort: '000000000000001'
5
+ end
@@ -0,0 +1,53 @@
1
+ # doesn't feel right to mess with $LOAD_PATH, but cannot require mabmapper without it
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
3
+
4
+ # silence that minitest related error about minitest/autorun (since minitest version >= 5)
5
+ gem 'minitest'
6
+
7
+ require 'mabmapper'
8
+ require 'minitest/autorun'
9
+ require 'nokogiri'
10
+ require 'pry'
11
+
12
+ # Extending Ruby's Module class simplifys tests massively
13
+ class Module
14
+ def define_field_test(doc_id, assertion_options)
15
+ field_name = self.to_s.gsub(/\ATest/, '').underscore.to_sym
16
+ expected_value = assertion_options.values.first
17
+
18
+ define_method "test_#{field_name}_for_#{doc_id}" do
19
+ assert_doc_field_value_equal(doc_id, field_name, expected_value)
20
+ end
21
+ end
22
+ end
23
+
24
+ def load_engine!
25
+ begin
26
+ engine_file = "mabmapper/aleph_mab_xml_engine" # TODO: Make me configurable
27
+ require engine_file
28
+ "#{engine_file}".classify.constantize.new
29
+ rescue LoadError
30
+ exit 1
31
+ end
32
+ end
33
+
34
+ def assert_doc_field_value_equal(doc_id, field_name, expected_value)
35
+ doc = Nokogiri::XML(@engine.process('', load_mab("test_#{field_name}", doc_id)).to_xml)
36
+ field_values = doc.css(field_name.to_s).map(&:text).map { |field_value| (field_value.first == '{' && field_value.last == '}') ? JSON.parse(field_value) : field_value }
37
+
38
+ if field_values.length == 0
39
+ assert_equal expected_value, nil
40
+ elsif field_values.length == 1
41
+ assert_equal expected_value, field_values.first.presence
42
+ else
43
+ assert_equal expected_value, field_values
44
+ end
45
+ end
46
+
47
+ def load_mab(directory_name, record_id)
48
+ mab_file_name = ["../mab_files/#{directory_name}/PAD01.#{record_id}.PRIMO.xml", "../mab_files/#{directory_name}/#{record_id}.xml"]
49
+ .map { |path| File.expand_path path, __FILE__ }
50
+ .find { |path| FileTest.exist? path }
51
+
52
+ File.read(mab_file_name)
53
+ end
@@ -0,0 +1,19 @@
1
+ require File.expand_path '../test_helper.rb', __FILE__
2
+
3
+ class TestMabmapper < MiniTest::Unit::TestCase #Minitest::Test
4
+ def setup
5
+ @engine = load_engine!
6
+ end
7
+ end
8
+
9
+ tests_to_run = ENV['TESTS'].split unless ENV['TESTS'].nil?
10
+
11
+ # require and include all mabmapper test modules
12
+ Dir.glob(File.expand_path '../mabmapper/test_*.rb', __FILE__).each do |filename|
13
+ if tests_to_run.nil? || tests_to_run.any? { |name_of_test| "test_#{name_of_test}" == File.basename(filename, '.rb') }
14
+ require filename
15
+ TestMabmapper.instance_eval do
16
+ include File.basename(filename, '.rb').camelize.constantize
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ #!/bin/bash
2
+ #
3
+ # Extracts a single mab/mabmapper xml file from a numbered archive
4
+ #
5
+ # Usage:
6
+ # ./mab_by_docid.sh 000636652 (call this from within the directory where the archives are located)
7
+ #
8
+ # Dependencies: bash, sed, xmllint (from libxml)
9
+ docid_with_zeros=$1
10
+ docid_without_zeros="$(echo $docid_with_zeros| sed 's/0*//')"
11
+ archive_number="$(((docid_without_zeros/50000)+1))"
12
+ archive_file_name=$(find . -name "aleph.PRIMO.*.$archive_number.tar.gz")
13
+ mab_file_name="PAD01.$docid_with_zeros.PRIMO.xml"
14
+
15
+ # grab the file from the tar.gz
16
+ tar -xvzf $archive_file_name $mab_file_name
17
+
18
+ # format output using xmllint
19
+ xmllint --format $mab_file_name --output $mab_file_name