mabmapper 1.0.0.pre15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +20 -0
  3. data/.travis.yml +4 -0
  4. data/Gemfile +7 -0
  5. data/LICENSE +22 -0
  6. data/README.md +49 -0
  7. data/Rakefile +29 -0
  8. data/bin/mabmapper +3 -0
  9. data/lib/mabmapper/aleph_mab_xml_engine.rb +1050 -0
  10. data/lib/mabmapper/cli.rb +216 -0
  11. data/lib/mabmapper/elasticsearch_writer.rb +52 -0
  12. data/lib/mabmapper/engine.rb +112 -0
  13. data/lib/mabmapper/mab_xml/document.rb +53 -0
  14. data/lib/mabmapper/mab_xml/field.rb +43 -0
  15. data/lib/mabmapper/mab_xml/query.rb +25 -0
  16. data/lib/mabmapper/mab_xml/query_helper.rb +101 -0
  17. data/lib/mabmapper/mab_xml/result_set.rb +34 -0
  18. data/lib/mabmapper/mab_xml/subfield.rb +12 -0
  19. data/lib/mabmapper/mab_xml.rb +6 -0
  20. data/lib/mabmapper/tar_writer.rb +29 -0
  21. data/lib/mabmapper/version.rb +3 -0
  22. data/lib/mabmapper.rb +11 -0
  23. data/mabmapper.gemspec +33 -0
  24. data/test/mab_files/test_creation_date/test1.xml +17 -0
  25. data/test/mab_files/test_creation_date/test2.xml +17 -0
  26. data/test/mab_files/test_creationdate/425_a_1.xml +17 -0
  27. data/test/mab_files/test_creationdate/425_a_2.xml +19 -0
  28. data/test/mab_files/test_creationdate/425_bc_1.xml +19 -0
  29. data/test/mab_files/test_creationdate/425_bc_2.xml +22 -0
  30. data/test/mab_files/test_creationdate/425_bc_3.xml +22 -0
  31. data/test/mab_files/test_creationdate/425_bc_4.xml +19 -0
  32. data/test/mab_files/test_creationdate/425_p_1.xml +19 -0
  33. data/test/mab_files/test_creationdate/425_p_2.xml +17 -0
  34. data/test/mab_files/test_creationdate/595_1.xml +20 -0
  35. data/test/mab_files/test_creator_contributor_facet/PAD01.001006945.PRIMO.xml +574 -0
  36. data/test/mab_files/test_description/405.xml +22 -0
  37. data/test/mab_files/test_description/501-519.xml +30 -0
  38. data/test/mab_files/test_description/522.xml +22 -0
  39. data/test/mab_files/test_description/523.xml +22 -0
  40. data/test/mab_files/test_description/536-537.xml +30 -0
  41. data/test/mab_files/test_doc/PAD01.001510737.PRIMO.xml +317 -0
  42. data/test/mab_files/test_edition/PAD01.000844686.PRIMO.xml +584 -0
  43. data/test/mab_files/test_edition/PAD01.000969531.PRIMO.xml +129 -0
  44. data/test/mab_files/test_edition/PAD01.000969710.PRIMO.xml +144 -0
  45. data/test/mab_files/test_edition/PAD01.000978033.PRIMO.xml +163 -0
  46. data/test/mab_files/test_edition/PAD01.000990520.PRIMO.xml +163 -0
  47. data/test/mab_files/test_erscheinungsform/PAD01.000870753.PRIMO.xml +256 -0
  48. data/test/mab_files/test_erscheinungsform/PAD01.000870755.PRIMO.xml +467 -0
  49. data/test/mab_files/test_ht_number/PAD01.001015067.PRIMO.xml +137 -0
  50. data/test/mab_files/test_inhaltstyp/PAD01.000870753.PRIMO.xml +256 -0
  51. data/test/mab_files/test_inhaltstyp/PAD01.000870755.PRIMO.xml +467 -0
  52. data/test/mab_files/test_is_secondary_form/PAD01.000806191.PRIMO.xml +216 -0
  53. data/test/mab_files/test_is_secondary_form/PAD01.000844686.PRIMO.xml +584 -0
  54. data/test/mab_files/test_is_secondary_form/PAD01.001015067.PRIMO.xml +137 -0
  55. data/test/mab_files/test_is_secondary_form/PAD01.001452439.PRIMO.xml +377 -0
  56. data/test/mab_files/test_is_suborder/PAD01.000806191.PRIMO.xml +216 -0
  57. data/test/mab_files/test_is_suborder/PAD01.000844686.PRIMO.xml +584 -0
  58. data/test/mab_files/test_is_suborder/PAD01.001452439.PRIMO.xml +377 -0
  59. data/test/mab_files/test_issn/PAD01.000637121.PRIMO.xml +805 -0
  60. data/test/mab_files/test_materialtyp/PAD01.000870753.PRIMO.xml +256 -0
  61. data/test/mab_files/test_materialtyp/PAD01.000870755.PRIMO.xml +467 -0
  62. data/test/mab_files/test_notation_sort/PAD01.000970649.PRIMO.xml +306 -0
  63. data/test/mab_files/test_notation_sort/PAD01.001006944.PRIMO.xml +279 -0
  64. data/test/mab_files/test_publisher/PAD01.000312406.PRIMO.xml +1043 -0
  65. data/test/mab_files/test_redactional_remark/PAD01.001510737.PRIMO.xml +317 -0
  66. data/test/mab_files/test_relation/PAD01.000438377.PRIMO.xml +232 -0
  67. data/test/mab_files/test_relation/PAD01.000637121.PRIMO.xml +810 -0
  68. data/test/mab_files/test_relation/PAD01.000806191.PRIMO.xml +216 -0
  69. data/test/mab_files/test_relation/PAD01.000844686.PRIMO.xml +584 -0
  70. data/test/mab_files/test_relation/PAD01.001452439.PRIMO.xml +377 -0
  71. data/test/mab_files/test_secondary_form_creationdate/PAD01.000806191.PRIMO.xml +216 -0
  72. data/test/mab_files/test_secondary_form_creationdate/PAD01.000844686.PRIMO.xml +584 -0
  73. data/test/mab_files/test_secondary_form_creationdate/PAD01.001452439.PRIMO.xml +377 -0
  74. data/test/mab_files/test_secondary_form_isbn/PAD01.000806191.PRIMO.xml +216 -0
  75. data/test/mab_files/test_secondary_form_isbn/PAD01.000844686.PRIMO.xml +584 -0
  76. data/test/mab_files/test_secondary_form_isbn/PAD01.001452439.PRIMO.xml +377 -0
  77. data/test/mab_files/test_secondary_form_physical_description/PAD01.000806191.PRIMO.xml +216 -0
  78. data/test/mab_files/test_secondary_form_physical_description/PAD01.001452439.PRIMO.xml +377 -0
  79. data/test/mab_files/test_secondary_form_preliminary_phrase/PAD01.000806191.PRIMO.xml +216 -0
  80. data/test/mab_files/test_secondary_form_preliminary_phrase/PAD01.001452439.PRIMO.xml +377 -0
  81. data/test/mab_files/test_secondary_form_publisher/PAD01.000806191.PRIMO.xml +216 -0
  82. data/test/mab_files/test_secondary_form_publisher/PAD01.001452439.PRIMO.xml +377 -0
  83. data/test/mab_files/test_secondary_form_superorder/PAD01.000806191.PRIMO.xml +216 -0
  84. data/test/mab_files/test_secondary_form_superorder/PAD01.000977734.PRIMO.xml +225 -0
  85. data/test/mab_files/test_secondary_form_superorder/PAD01.001452439.PRIMO.xml +377 -0
  86. data/test/mab_files/test_short_title_display/PAD01.000057960.PRIMO.xml +1069 -0
  87. data/test/mab_files/test_short_title_display/PAD01.000058000.PRIMO.xml +995 -0
  88. data/test/mab_files/test_short_title_display/PAD01.000215104.PRIMO.xml +191 -0
  89. data/test/mab_files/test_short_title_display/PAD01.000310864.PRIMO.xml +999 -0
  90. data/test/mab_files/test_short_title_display/PAD01.000392641.PRIMO.xml +4334 -0
  91. data/test/mab_files/test_short_title_display/PAD01.000392645.PRIMO.xml +4094 -0
  92. data/test/mab_files/test_short_title_display/PAD01.000438377.PRIMO.xml +232 -0
  93. data/test/mab_files/test_short_title_display/PAD01.000479391.PRIMO.xml +142 -0
  94. data/test/mab_files/test_short_title_display/PAD01.000637121.PRIMO.xml +805 -0
  95. data/test/mab_files/test_short_title_display/PAD01.000676616.PRIMO.xml +128 -0
  96. data/test/mab_files/test_short_title_display/PAD01.000782994.PRIMO.xml +169 -0
  97. data/test/mab_files/test_short_title_display/PAD01.001006945.PRIMO.xml +574 -0
  98. data/test/mab_files/test_short_title_display/PAD01.001015067.PRIMO.xml +137 -0
  99. data/test/mab_files/test_short_title_display/PAD01.001015070.PRIMO.xml +212 -0
  100. data/test/mab_files/test_short_title_display/PAD01.001108212.PRIMO.xml +259 -0
  101. data/test/mab_files/test_short_title_display/PAD01.001249043.PRIMO.xml +172 -0
  102. data/test/mab_files/test_short_title_display/PAD01.001499877.PRIMO.xml +227 -0
  103. data/test/mab_files/test_short_title_display/PAD01.001499879.PRIMO.xml +255 -0
  104. data/test/mab_files/test_short_title_display/PAD01.001499880.PRIMO.xml +279 -0
  105. data/test/mab_files/test_short_title_display/PAD01.001510878.PRIMO.xml +184 -0
  106. data/test/mab_files/test_short_title_display/PAD01.001562173.PRIMO.xml +116 -0
  107. data/test/mab_files/test_short_title_display/PAD01.001568334.PRIMO.xml +1840 -0
  108. data/test/mab_files/test_short_title_display/PAD01.001572048.PRIMO.xml +68 -0
  109. data/test/mab_files/test_short_title_display/PAD01.001572049.PRIMO.xml +133 -0
  110. data/test/mab_files/test_signature/PAD01.000161445.PRIMO.xml +149 -0
  111. data/test/mab_files/test_signature/PAD01.000321365.PRIMO.xml +343 -0
  112. data/test/mab_files/test_signature/PAD01.000636652.PRIMO.xml +217 -0
  113. data/test/mab_files/test_signature/PAD01.000857994.PRIMO.xml +187 -0
  114. data/test/mab_files/test_signature/PAD01.000859176.PRIMO.xml +559 -0
  115. data/test/mab_files/test_signature/PAD01.000969442.PRIMO.xml +210 -0
  116. data/test/mab_files/test_signature/PAD01.001006945.PRIMO.xml +574 -0
  117. data/test/mab_files/test_signature_search/PAD01.000161445.PRIMO.xml +149 -0
  118. data/test/mab_files/test_signature_search/PAD01.000321365.PRIMO.xml +343 -0
  119. data/test/mab_files/test_signature_search/PAD01.000636652.PRIMO.xml +217 -0
  120. data/test/mab_files/test_signature_search/PAD01.000857994.PRIMO.xml +187 -0
  121. data/test/mab_files/test_signature_search/PAD01.000859176.PRIMO.xml +559 -0
  122. data/test/mab_files/test_signature_search/PAD01.000969442.PRIMO.xml +210 -0
  123. data/test/mab_files/test_signature_search/PAD01.001006945.PRIMO.xml +574 -0
  124. data/test/mab_files/test_status/PAD01.000321365.PRIMO.xml +343 -0
  125. data/test/mab_files/test_status/PAD01.000392641.PRIMO.xml +4337 -0
  126. data/test/mab_files/test_status/detmold_1.xml +17 -0
  127. data/test/mab_files/test_status/detmold_2.xml +17 -0
  128. data/test/mab_files/test_status/detmold_3.xml +12 -0
  129. data/test/mab_files/test_subject/PAD01.000972511.PRIMO.xml +406 -0
  130. data/test/mab_files/test_suborders/PAD01.000057960.PRIMO.xml +1069 -0
  131. data/test/mab_files/test_suborders/PAD01.000058000.PRIMO.xml +995 -0
  132. data/test/mab_files/test_suborders/PAD01.000215104.PRIMO.xml +191 -0
  133. data/test/mab_files/test_suborders/PAD01.000310864.PRIMO.xml +999 -0
  134. data/test/mab_files/test_suborders/PAD01.000392641.PRIMO.xml +4334 -0
  135. data/test/mab_files/test_suborders/PAD01.000392645.PRIMO.xml +4094 -0
  136. data/test/mab_files/test_suborders/PAD01.000438377.PRIMO.xml +232 -0
  137. data/test/mab_files/test_suborders/PAD01.000479391.PRIMO.xml +142 -0
  138. data/test/mab_files/test_suborders/PAD01.000637121.PRIMO.xml +805 -0
  139. data/test/mab_files/test_suborders/PAD01.000676616.PRIMO.xml +128 -0
  140. data/test/mab_files/test_suborders/PAD01.001006945.PRIMO.xml +574 -0
  141. data/test/mab_files/test_suborders/PAD01.001015067.PRIMO.xml +137 -0
  142. data/test/mab_files/test_suborders/PAD01.001015068.PRIMO.xml +216 -0
  143. data/test/mab_files/test_suborders/PAD01.001015070.PRIMO.xml +212 -0
  144. data/test/mab_files/test_suborders/PAD01.001108212.PRIMO.xml +259 -0
  145. data/test/mab_files/test_suborders/PAD01.001499877.PRIMO.xml +227 -0
  146. data/test/mab_files/test_suborders/PAD01.001499879.PRIMO.xml +255 -0
  147. data/test/mab_files/test_suborders/PAD01.001499880.PRIMO.xml +279 -0
  148. data/test/mab_files/test_suborders/PAD01.001562173.PRIMO.xml +116 -0
  149. data/test/mab_files/test_suborders/PAD01.001572048.PRIMO.xml +68 -0
  150. data/test/mab_files/test_suborders/PAD01.001572049.PRIMO.xml +133 -0
  151. data/test/mab_files/test_superorder/PAD01.000806191.PRIMO.xml +216 -0
  152. data/test/mab_files/test_superorder/PAD01.000844686.PRIMO.xml +584 -0
  153. data/test/mab_files/test_superorder/PAD01.001015067.PRIMO.xml +137 -0
  154. data/test/mab_files/test_superorder/PAD01.001452439.PRIMO.xml +377 -0
  155. data/test/mab_files/test_superorder_display/PAD01.000000872.PRIMO.xml +227 -0
  156. data/test/mab_files/test_superorder_display/PAD01.000160412.PRIMO.xml +518 -0
  157. data/test/mab_files/test_superorder_display/PAD01.000162669.PRIMO.xml +198 -0
  158. data/test/mab_files/test_superorder_display/PAD01.000178500.PRIMO.xml +158 -0
  159. data/test/mab_files/test_superorder_display/PAD01.000297043.PRIMO.xml +154 -0
  160. data/test/mab_files/test_superorder_display/PAD01.000562878.PRIMO.xml +1214 -0
  161. data/test/mab_files/test_superorder_display/PAD01.000958473.PRIMO.xml +379 -0
  162. data/test/mab_files/test_superorder_display/PAD01.001006945.PRIMO.xml +574 -0
  163. data/test/mab_files/test_superorders/PAD01.000057960.PRIMO.xml +1069 -0
  164. data/test/mab_files/test_superorders/PAD01.000215104.PRIMO.xml +191 -0
  165. data/test/mab_files/test_superorders/PAD01.000310864.PRIMO.xml +999 -0
  166. data/test/mab_files/test_superorders/PAD01.000392641.PRIMO.xml +4334 -0
  167. data/test/mab_files/test_superorders/PAD01.000438377.PRIMO.xml +232 -0
  168. data/test/mab_files/test_superorders/PAD01.000479391.PRIMO.xml +142 -0
  169. data/test/mab_files/test_superorders/PAD01.000637121.PRIMO.xml +805 -0
  170. data/test/mab_files/test_superorders/PAD01.001015067.PRIMO.xml +137 -0
  171. data/test/mab_files/test_superorders/PAD01.001499877.PRIMO.xml +227 -0
  172. data/test/mab_files/test_superorders/PAD01.001572048.PRIMO.xml +68 -0
  173. data/test/mab_files/test_title_display/PAD01.000954111.PRIMO.xml +162 -0
  174. data/test/mab_files/test_title_display/PAD01.000992332.PRIMO.xml +189 -0
  175. data/test/mab_files/test_title_display/PAD01.001015068.PRIMO.xml +216 -0
  176. data/test/mab_files/test_title_display/PAD01.001499879.PRIMO.xml +255 -0
  177. data/test/mab_files/test_title_search/test_1.xml +20 -0
  178. data/test/mab_files/test_title_sort/PAD01.000954111.PRIMO.xml +162 -0
  179. data/test/mab_files/test_title_sort/PAD01.000992332.PRIMO.xml +189 -0
  180. data/test/mab_files/test_volume_count_sort/PAD01.001015068.PRIMO.xml +216 -0
  181. data/test/mab_files/test_volume_count_sort/PAD01.001499879.PRIMO.xml +255 -0
  182. data/test/mabmapper/test_creation_date.rb +5 -0
  183. data/test/mabmapper/test_creationdate.rb +23 -0
  184. data/test/mabmapper/test_creator_contributor_facet.rb +4 -0
  185. data/test/mabmapper/test_description.rb +9 -0
  186. data/test/mabmapper/test_doc.rb +6 -0
  187. data/test/mabmapper/test_edition.rb +8 -0
  188. data/test/mabmapper/test_erscheinungsform.rb +5 -0
  189. data/test/mabmapper/test_ht_number.rb +4 -0
  190. data/test/mabmapper/test_inhaltstyp.rb +5 -0
  191. data/test/mabmapper/test_is_secondary_form.rb +7 -0
  192. data/test/mabmapper/test_is_suborder.rb +7 -0
  193. data/test/mabmapper/test_issn.rb +4 -0
  194. data/test/mabmapper/test_materialtyp.rb +5 -0
  195. data/test/mabmapper/test_notation_sort.rb +5 -0
  196. data/test/mabmapper/test_publisher.rb +5 -0
  197. data/test/mabmapper/test_redactional_remark.rb +4 -0
  198. data/test/mabmapper/test_relation.rb +16 -0
  199. data/test/mabmapper/test_secondary_form_creationdate.rb +6 -0
  200. data/test/mabmapper/test_secondary_form_isbn.rb +6 -0
  201. data/test/mabmapper/test_secondary_form_physical_description.rb +5 -0
  202. data/test/mabmapper/test_secondary_form_preliminary_phrase.rb +5 -0
  203. data/test/mabmapper/test_secondary_form_publisher.rb +5 -0
  204. data/test/mabmapper/test_secondary_form_superorder.rb +9 -0
  205. data/test/mabmapper/test_short_title_display.rb +27 -0
  206. data/test/mabmapper/test_signature.rb +12 -0
  207. data/test/mabmapper/test_signature_search.rb +12 -0
  208. data/test/mabmapper/test_status.rb +13 -0
  209. data/test/mabmapper/test_subject.rb +5 -0
  210. data/test/mabmapper/test_suborders.rb +192 -0
  211. data/test/mabmapper/test_superorder.rb +7 -0
  212. data/test/mabmapper/test_superorder_display.rb +22 -0
  213. data/test/mabmapper/test_superorders.rb +38 -0
  214. data/test/mabmapper/test_title_display.rb +12 -0
  215. data/test/mabmapper/test_title_search.rb +4 -0
  216. data/test/mabmapper/test_title_sort.rb +6 -0
  217. data/test/mabmapper/test_volume_count_sort.rb +5 -0
  218. data/test/test_helper.rb +53 -0
  219. data/test/test_mabmapper.rb +19 -0
  220. data/utils/mab_by_docid.sh +19 -0
  221. metadata +574 -0
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormCreationdate
3
+ define_field_test '000806191', secondary_form_creationdate: nil
4
+ define_field_test '000844686', secondary_form_creationdate: '2001'
5
+ define_field_test '001452439', secondary_form_creationdate: '2012'
6
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormIsbn
3
+ define_field_test '000806191', secondary_form_isbn: '3-8288-0675-9'
4
+ define_field_test '000844686', secondary_form_isbn: '3-8288-1141-8'
5
+ define_field_test '001452439', secondary_form_isbn: nil
6
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormPhysicalDescription
3
+ define_field_test '000806191', secondary_form_physical_description: '2 Mikrofiches : 24x'
4
+ define_field_test '001452439', secondary_form_physical_description: nil
5
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormPreliminaryPhrase
3
+ define_field_test '000806191', secondary_form_preliminary_phrase: 'Mikrofiche-Ausg.:'
4
+ define_field_test '001452439', secondary_form_preliminary_phrase: 'Digitalisierte Ausg.'
5
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormPublisher
3
+ define_field_test '000806191', secondary_form_publisher: 'Marburg : Tectum-Verl.'
4
+ define_field_test '001452439', secondary_form_publisher: 'Paderborn : Universitätsbibliothek Paderborn'
5
+ end
@@ -0,0 +1,9 @@
1
+ # coding: utf-8
2
+ module TestSecondaryFormSuperorder
3
+ define_field_test '000806191', secondary_form_superorder: {"ht_number" => "HT006670284","label" => "Edition Wissenschaft : Reihe Chemie ; 240","volume_count" => "240"}
4
+ define_field_test '000977734', secondary_form_superorder: [
5
+ {"ht_number"=>nil, "label"=>"Zeitschriften der HAAB Weimar. Projekt Sicherungsverfilmung der HAAB Weimar", "volume_count"=>nil},
6
+ {"ht_number"=>nil, "label"=>"Faustsammlung der HAAB Weimar", "volume_count"=>nil}
7
+ ]
8
+ define_field_test '001452439', secondary_form_superorder: {"ht_number" => nil, "label" => "Digitale Sammlungen der Universitätsbibliothek Paderborn", "volume_count" => nil}
9
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ module TestShortTitleDisplay
3
+ define_field_test '000057960', short_title_display: 'Sämtliche Werke : historisch-kritische Ausgabe'
4
+ define_field_test '000058000', short_title_display: 'Tannhäuser'
5
+ define_field_test '000215104', short_title_display: 'Hiersemanns bibliographische Handbücher'
6
+ define_field_test '000310864', short_title_display: 'Paderborner Universitätsreden'
7
+ define_field_test '000392641', short_title_display: 'Hegel-Jahrbuch'
8
+ define_field_test '000392645', short_title_display: 'Hegel-Jahrbuch'
9
+ define_field_test '000438377', short_title_display: 'Urkundenregesten zur Tätigkeit des deutschen Königs- und Hofgerichts bis 1451'
10
+ define_field_test '000479391', short_title_display: 'Quellen und Forschungen zur höchsten Gerichtsbarkeit im alten Reich'
11
+ define_field_test '000637121', short_title_display: 'Zeitschrift für Familienforschung : ZfF'
12
+ define_field_test '000676616', short_title_display: 'Recht und Staat'
13
+ define_field_test '000782994', short_title_display: 'Rot'
14
+ define_field_test '001006945', short_title_display: 'Die Zeit Wenzels 1397 - 1400'
15
+ define_field_test '001015067', short_title_display: 'Forum Geschichte kompakt'
16
+ define_field_test '001015070', short_title_display: 'Vom Ende des Ersten Weltkriegs bis zur Gegenwart [Schülerbd.]'
17
+ define_field_test '001108212', short_title_display: 'Johann Nestroy - Dokumente'
18
+ define_field_test '001249043', short_title_display: 'Fractions, decimals, ratios, and percents' # replace [Hauptbd.] with name of superorder
19
+ define_field_test '001499877', short_title_display: 'Fakten und Fiktionen : Werklexikon der deutschsprachigen Schlüsselliteratur ; 1900 - 2010'
20
+ define_field_test '001499879', short_title_display: 'Andres bis Loest'
21
+ define_field_test '001499880', short_title_display: 'Heinrich Mann bis Zwerenz'
22
+ define_field_test '001510878', short_title_display: 'Tanzhaus' # short title would be '[Buch]', so take the superorder title
23
+ define_field_test '001562173', short_title_display: 'Schwerpunktthema: Türkische Familien in Deutschland - Generationenbeziehungen und Generationenperspektiven'
24
+ define_field_test '001568334', short_title_display: 'Software Engineering 2013' # short title would be 'Buch', so take the superorder title
25
+ define_field_test '001572048', short_title_display: 'Paderborner Rathaus-Vorlesungen'
26
+ define_field_test '001572049', short_title_display: 'Alte und Neue Welt : zur wechselvollen Geschichte transatlanitischer Kulturkontakte'
27
+ end
@@ -0,0 +1,12 @@
1
+ # coding: utf-8
2
+ module TestSignature
3
+ define_field_test '000321365', signature: 'P10/34k12'
4
+ define_field_test '000636652', signature: 'P10/34m3'
5
+ define_field_test '000857994', signature: 'P10/34t26'
6
+ define_field_test '000859176', signature: 'KDVD1105'
7
+ define_field_test '000969442', signature: 'TWR12765'
8
+
9
+ # Signatur mit anhängiger Bandzählung
10
+ define_field_test '000161445', signature: 'KXW4113-80/81'
11
+ define_field_test '001006945', signature: 'LKL2468-14'
12
+ end
@@ -0,0 +1,12 @@
1
+ # coding: utf-8
2
+ module TestSignatureSearch
3
+ define_field_test '000321365', signature_search: ["34K12", "P10/34K12"]
4
+ define_field_test '000636652', signature_search: ["P10/34M3", "34M3"]
5
+ define_field_test '000857994', signature_search: ["34T26", "P10/34T26"]
6
+ define_field_test '000859176', signature_search: "KDVD1105"
7
+ define_field_test '000969442', signature_search: ["TWR12765+4", "TWR12765", "TWR12765+1", "TWR12765+2", "TWR12765+3"]
8
+
9
+ # Signatur mit anhängiger Bandzählung
10
+ define_field_test '000161445', signature_search: ["KXW4113-80/81", "KXW4113-80", "KXW4113"]
11
+ define_field_test '001006945', signature_search: ["LKL2468-14", "LKL2468"]
12
+ end
@@ -0,0 +1,13 @@
1
+ # coding: utf-8
2
+ module TestStatus
3
+ # gelöscht -> LDR Position 6 == 'd'
4
+ define_field_test '000321365', status: 'D'
5
+
6
+ # ausgesondert über Feld 078
7
+ define_field_test '000392641', status: 'D'
8
+
9
+ # Standort Detmold unterdrücken
10
+ define_field_test 'detmold_1', status: 'D'
11
+ define_field_test 'detmold_2', status: 'A'
12
+ define_field_test 'detmold_3', status: 'A'
13
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestSubject
3
+ # << ... >> should be removed
4
+ define_field_test '000972511', subject: ["Vischer, Friedrich Theodor von", "Faust"] # Vischer, Friedrich Theodor &lt;&lt;von&gt;&gt;
5
+ end
@@ -0,0 +1,192 @@
1
+ # coding: utf-8
2
+ module TestSuborders
3
+ # case 1 (a <- b AND a <- c)
4
+ def test_001015068_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT015707971'); end
5
+ def test_001015068_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end
6
+ def test_001015068_has_superorder_display
7
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
8
+ ht_number: "HT015707971",
9
+ label: "Forum Geschichte kompakt",
10
+ volume_count: "Bd. 2, Teilbd. 1, Von der Frühen Neuzeit bis zum Ersten Weltkrieg [Schülerbd.]",
11
+ label_additions: nil
12
+ })
13
+ end
14
+
15
+ def test_001015070_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT015707971'); end
16
+ def test_001015070_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end
17
+ def test_001015070_has_superorder_display
18
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
19
+ ht_number: "HT015707971",
20
+ label: "Forum Geschichte kompakt",
21
+ volume_count: "Bd. 2, Teilbd. 2, Vom Ende des Ersten Weltkriegs bis zur Gegenwart [Schülerbd.]",
22
+ label_additions: nil
23
+ })
24
+ end
25
+
26
+ # case 2 (a <- b <- c)
27
+ def test_001499879_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT017055615'); end;
28
+ def test_001499879_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
29
+ def test_001499879_has_superorder_display
30
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
31
+ ht_number: "HT017055615",
32
+ label: "Fakten und Fiktionen",
33
+ volume_count: "1",
34
+ label_additions: nil
35
+ })
36
+ end
37
+
38
+ def test_001499880_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT017055615'); end;
39
+ def test_001499880_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
40
+ def test_001499880_has_superorder_display
41
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
42
+ ht_number: "HT017055615",
43
+ label: "Fakten und Fiktionen",
44
+ volume_count: "2",
45
+ label_additions: nil
46
+ })
47
+ end
48
+
49
+ def test_001499877_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT001260573'); end;
50
+ def test_001499877_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
51
+ def test_001499877_has_superorder_display
52
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
53
+ ht_number: "HT001260573",
54
+ label: "Hiersemanns bibliographische Handbücher",
55
+ volume_count: "21",
56
+ label_additions: nil
57
+ })
58
+ end
59
+
60
+ # case 3 (b <- a AND c <- a)
61
+ def test_001006945_has_superorders; assert_has_superorders(doc_id_from_method_name(__method__), ['HT002919097', 'HT003165994']); end;
62
+ def test_001006945_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
63
+ def test_001006945_has_superorder_display
64
+ assert_has_superorder_display_values(doc_id_from_method_name(__method__), [{
65
+ "ht_number" => "HT002919097",
66
+ "label" => "Urkundenregesten zur Tätigkeit des deutschen Königs- und Hofgerichts bis 1451",
67
+ "volume_count" => "14",
68
+ "label_additions" => nil
69
+ }, {
70
+ "ht_number" => "HT003165994",
71
+ "label" => "Quellen und Forschungen zur höchsten Gerichtsbarkeit im alten Reich",
72
+ "volume_count" => "14",
73
+ "label_additions" => ["Sonderreihe"]
74
+ }])
75
+ end
76
+
77
+ # case 4 (a <- b)
78
+ def test_001562173_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT005081594'); end;
79
+ def test_001562173_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
80
+ def test_001562173_has_superorder_display
81
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
82
+ "ht_number" => "HT005081594",
83
+ "label" => "Zeitschrift für Familienforschung",
84
+ "volume_count" => "25,1",
85
+ "label_additions" => nil
86
+ })
87
+ end
88
+
89
+ # case 5 (b <- a AND c <- a)
90
+ def test_001572049_has_superorders; assert_has_superorders(doc_id_from_method_name(__method__), ['HT002162049', 'HT017670666']); end;
91
+ def test_001572049_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
92
+ def test_001572049_has_superorder_display
93
+ assert_has_superorder_display_values(doc_id_from_method_name(__method__), [{
94
+ "ht_number" => "HT002162049",
95
+ "label" => "Paderborner Universitätsreden",
96
+ "volume_count" => "127",
97
+ "label_additions" => nil
98
+ }, {
99
+ "ht_number" => "HT017670666",
100
+ "label" => "Paderborner Rathaus-Vorlesungen",
101
+ "volume_count" => "1",
102
+ "label_additions" => nil
103
+ }])
104
+ end
105
+
106
+ # case 6 (a <- b AND a <- c)
107
+ def test_000676616_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT002672902'); end;
108
+ def test_000676616_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
109
+ def test_000676616_has_superorder_display
110
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
111
+ "ht_number" => "HT002672902",
112
+ "label" => "Hegel-Jahrbuch",
113
+ "volume_count" => "1993/94",
114
+ "label_additions" => nil
115
+ })
116
+ end
117
+
118
+ def test_000392645_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT002672902'); end;
119
+ def test_000392645_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
120
+ def test_000392645_has_superorder_display
121
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
122
+ "ht_number" => "HT002672902",
123
+ "label" => "Hegel-Jahrbuch",
124
+ "volume_count" => "1966",
125
+ "label_additions" => nil
126
+ })
127
+ end
128
+
129
+ # case 7 (a <- b AND a <- c)
130
+ def test_000058000_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT000289652'); end;
131
+ def test_000058000_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
132
+ def test_000058000_has_superorder_display
133
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
134
+ "ht_number" => "HT000289652",
135
+ "label" => "Sämtliche Werke",
136
+ "volume_count" => "Stücke 36",
137
+ "label_additions" => nil
138
+ })
139
+ end
140
+
141
+ def test_001108212_has_superorder; assert_has_superorder(doc_id_from_method_name(__method__), 'HT000289652'); end;
142
+ def test_001108212_is_suborder; assert_is_suborder(doc_id_from_method_name(__method__)); end;
143
+ def test_001108212_has_superorder_display
144
+ assert_has_superorder_display_value(doc_id_from_method_name(__method__), {
145
+ "ht_number" => "HT000289652",
146
+ "label" => "Sämtliche Werke / Johann Nestroy",
147
+ "volume_count" => "",
148
+ "label_additions" => nil
149
+ })
150
+ end
151
+
152
+ private
153
+
154
+ def assert_has_superorder(doc_id, superorder_id)
155
+ assert_has_superorders(doc_id, [superorder_id])
156
+ end
157
+
158
+ def assert_has_superorder_display_value(doc_id, superorder_display_values)
159
+ assert_has_superorder_display_values(doc_id, [superorder_display_values])
160
+ end
161
+
162
+ def assert_has_superorder_display_values(doc_id, superorder_display_values)
163
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_suborders', doc_id)).to_xml)
164
+ superorder_display_nodes = doc.css('superorder_display')
165
+
166
+ superorder_display_values.each_with_index do |superorder_display_value, index|
167
+ assert_equal(superorder_display_value.stringify_keys, JSON.parse(superorder_display_nodes[index].text))
168
+ end
169
+ end
170
+
171
+ def assert_has_superorders(doc_id, superorder_ids)
172
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_suborders', doc_id)).to_xml)
173
+ superorders = doc.css('superorder')
174
+
175
+ assert_equal(true, superorders.present?)
176
+ assert_equal(superorder_ids.length, superorders.length)
177
+
178
+ superorder_ids.each do |superorder_id|
179
+ assert_includes(superorders.map(&:text), superorder_id)
180
+ end
181
+ end
182
+
183
+ def assert_is_suborder(doc_id)
184
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_suborders', doc_id)).to_xml)
185
+ assert_equal(true, (value = doc.css('is_suborder').text).present?)
186
+ assert_equal('true', value)
187
+ end
188
+
189
+ def doc_id_from_method_name(method_name)
190
+ method_name[/\d+/]
191
+ end
192
+ end
@@ -0,0 +1,7 @@
1
+ # coding: utf-8
2
+ module TestSuperorder
3
+ # check for superorders of secondary forms (mab 623 and 629)
4
+ define_field_test '000806191', superorder: 'HT006670284'
5
+ define_field_test '000844686', superorder: 'HT007082773'
6
+ define_field_test '001452439', superorder: nil
7
+ end
@@ -0,0 +1,22 @@
1
+ # coding: utf-8
2
+ module TestSuperorderDisplay
3
+
4
+ # superorder label with leading '...' but without ';' or ':' between dots and label
5
+ define_field_test '000000872', superorder_display: [{"ht_number"=>"HT001310809", "label"=>"Wissenschaftliche Tagung der Arbeitsgemeinschaft für Klinische Diätetik", "volume_count"=>"2", "label_additions"=>nil}, {"ht_number"=>"HT002182783", "label"=>"Aktuelle Ernährungsmedizin", "volume_count"=>"[3], Suppl", "label_additions"=>nil}]
6
+
7
+ # for the label everything behind ':' should be removed
8
+ define_field_test '000162669', superorder_display: {"ht_number"=>"HT001231518", "label"=>"Handbuch der Dogmengeschichte", "volume_count"=>"Bd. 1, Das Dasein im Glauben ; Fasz. 4", "label_additions"=>nil}
9
+ define_field_test '000178500', superorder_display: {"ht_number"=>"HT001231518", "label"=>"Handbuch der Dogmengeschichte", "volume_count"=>"Bd. 3, Christologie, Soteriologie, Ekklesiologie, Mariologie, Gnadenlehre ; Fasz.4", "label_additions"=>nil}
10
+ define_field_test '000297043', superorder_display: {"ht_number"=>"HT001231518", "label"=>"Handbuch der Dogmengeschichte", "volume_count"=>"Bd. 4, Sakramente, Eschatologie ; Fasz. 1", "label_additions"=>nil}
11
+
12
+ # << ... >> should be removed
13
+ define_field_test '000958473', superorder_display: [{"ht_number"=>"HT001231617", "label"=>"Hessische Forschungen", "volume_count"=>"47", "label_additions"=>nil}, {"ht_number"=>"HT003779625", "label"=>"Die Geschichte unserer Heimat", "volume_count"=>"45", "label_additions"=>nil}]
14
+
15
+ # label additions
16
+ define_field_test '000160412', superorder_display: {"ht_number"=>"HT001237362", "label"=>"Historische Zeitschrift", "volume_count"=>"[N.F.],1", "label_additions"=>["Beiheft"]}
17
+ define_field_test '001006945', superorder_display: [{"ht_number"=>"HT002919097", "label"=>"Urkundenregesten zur Tätigkeit des deutschen Königs- und Hofgerichts bis 1451", "volume_count"=>"14", "label_additions"=>nil}, {"ht_number"=>"HT003165994", "label"=>"Quellen und Forschungen zur höchsten Gerichtsbarkeit im alten Reich", "volume_count"=>"14", "label_additions"=>["Sonderreihe"]}]
18
+
19
+ # sometimes there are multiple 451 fields with one startingen with '...', if this is the case, try the other one
20
+ define_field_test '000562878', superorder_display: [{"ht_number"=>"HT003809808", "label"=>"Management & marketing dictionary", "volume_count"=>"1", "label_additions"=>nil}, {"ht_number"=>"HT002100889", "label"=>"dtv", "volume_count"=>"5815 : Beck-Wirtschaftsberater", "label_additions"=>nil}]
21
+
22
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ module TestSuperorders
3
+ # case 1
4
+ def test_001015067_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
5
+
6
+ # case 2
7
+ def test_001499877_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
8
+ def test_000215104_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
9
+
10
+ # case 3
11
+ def test_000438377_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
12
+ def test_000479391_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
13
+
14
+ # case 4
15
+ def test_000637121_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
16
+
17
+ # case 5
18
+ def test_000310864_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
19
+ def test_001572048_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
20
+
21
+ # case 6
22
+ def test_000392641_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
23
+
24
+ # case 7
25
+ def test_000057960_is_superorder; assert_is_superorder(doc_id_from_method_name(__method__)); end
26
+
27
+ private
28
+
29
+ def assert_is_superorder(doc_id)
30
+ doc = Nokogiri::XML(@engine.process('', load_mab('test_superorders', doc_id)).to_xml)
31
+ assert_equal(true, (value = doc.css('is_superorder').text).present?)
32
+ assert_equal('true', value)
33
+ end
34
+
35
+ def doc_id_from_method_name(method_name)
36
+ method_name[/\d+/]
37
+ end
38
+ end
@@ -0,0 +1,12 @@
1
+ # coding: utf-8
2
+ module TestTitleDisplay
3
+ # should insert 'Bd.'
4
+ define_field_test '001499879', title_display: 'Fakten und Fiktionen : Werklexikon der deutschsprachigen Schlüsselliteratur ; 1900 - 2010. Bd. 1. Andres bis Loest'
5
+
6
+ # should not insert 'Bd.'
7
+ define_field_test '001015068', title_display: 'Forum Geschichte kompakt. Bd. 2, Teilbd. 1, Von der Frühen Neuzeit bis zum Ersten Weltkrieg [Schülerbd.]'
8
+
9
+ # '<<' and '>>' should be removed
10
+ define_field_test '000954111', title_display: 'Kitakantō-igaku = The Kitakanto medical journal'
11
+ define_field_test '000992332', title_display: 'Der Hexenbürgermeister von Lemgo : ein Lesedrama in Versen'
12
+ end
@@ -0,0 +1,4 @@
1
+ # coding: utf-8
2
+ module TestTitleSearch
3
+ define_field_test 'test_1', title: ['AAA', 'XXX', 'YYY']
4
+ end
@@ -0,0 +1,6 @@
1
+ # coding: utf-8
2
+ module TestTitleSort
3
+ # << ... >> should be removed
4
+ define_field_test '000954111', title_sort: 'Kitakantō-igaku = Kitakanto medical journal'
5
+ define_field_test '000992332', title_sort: 'Hexenbürgermeister von Lemgo : ein Lesedrama in Versen'
6
+ end
@@ -0,0 +1,5 @@
1
+ # coding: utf-8
2
+ module TestVolumeCountSort
3
+ define_field_test '001015068', volume_count_sort: '0002,x,1,schuel'
4
+ define_field_test '001499879', volume_count_sort: '000000000000001'
5
+ end
@@ -0,0 +1,53 @@
1
+ # doesn't feel right to mess with $LOAD_PATH, but cannot require mabmapper without it
2
+ $LOAD_PATH.unshift(File.expand_path('../../lib', __FILE__))
3
+
4
+ # silence that minitest related error about minitest/autorun (since minitest version >= 5)
5
+ gem 'minitest'
6
+
7
+ require 'mabmapper'
8
+ require 'minitest/autorun'
9
+ require 'nokogiri'
10
+ require 'pry'
11
+
12
+ # Extending Ruby's Module class simplifys tests massively
13
+ class Module
14
+ def define_field_test(doc_id, assertion_options)
15
+ field_name = self.to_s.gsub(/\ATest/, '').underscore.to_sym
16
+ expected_value = assertion_options.values.first
17
+
18
+ define_method "test_#{field_name}_for_#{doc_id}" do
19
+ assert_doc_field_value_equal(doc_id, field_name, expected_value)
20
+ end
21
+ end
22
+ end
23
+
24
+ def load_engine!
25
+ begin
26
+ engine_file = "mabmapper/aleph_mab_xml_engine" # TODO: Make me configurable
27
+ require engine_file
28
+ "#{engine_file}".classify.constantize.new
29
+ rescue LoadError
30
+ exit 1
31
+ end
32
+ end
33
+
34
+ def assert_doc_field_value_equal(doc_id, field_name, expected_value)
35
+ doc = Nokogiri::XML(@engine.process('', load_mab("test_#{field_name}", doc_id)).to_xml)
36
+ field_values = doc.css(field_name.to_s).map(&:text).map { |field_value| (field_value.first == '{' && field_value.last == '}') ? JSON.parse(field_value) : field_value }
37
+
38
+ if field_values.length == 0
39
+ assert_equal expected_value, nil
40
+ elsif field_values.length == 1
41
+ assert_equal expected_value, field_values.first.presence
42
+ else
43
+ assert_equal expected_value, field_values
44
+ end
45
+ end
46
+
47
+ def load_mab(directory_name, record_id)
48
+ mab_file_name = ["../mab_files/#{directory_name}/PAD01.#{record_id}.PRIMO.xml", "../mab_files/#{directory_name}/#{record_id}.xml"]
49
+ .map { |path| File.expand_path path, __FILE__ }
50
+ .find { |path| FileTest.exist? path }
51
+
52
+ File.read(mab_file_name)
53
+ end
@@ -0,0 +1,19 @@
1
+ require File.expand_path '../test_helper.rb', __FILE__
2
+
3
+ class TestMabmapper < MiniTest::Unit::TestCase #Minitest::Test
4
+ def setup
5
+ @engine = load_engine!
6
+ end
7
+ end
8
+
9
+ tests_to_run = ENV['TESTS'].split unless ENV['TESTS'].nil?
10
+
11
+ # require and include all mabmapper test modules
12
+ Dir.glob(File.expand_path '../mabmapper/test_*.rb', __FILE__).each do |filename|
13
+ if tests_to_run.nil? || tests_to_run.any? { |name_of_test| "test_#{name_of_test}" == File.basename(filename, '.rb') }
14
+ require filename
15
+ TestMabmapper.instance_eval do
16
+ include File.basename(filename, '.rb').camelize.constantize
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ #!/bin/bash
2
+ #
3
+ # Extracts a single mab/mabmapper xml file from a numbered archive
4
+ #
5
+ # Usage:
6
+ # ./mab_by_docid.sh 000636652 (call this from within the directory where the archives are located)
7
+ #
8
+ # Dependencies: bash, sed, xmllint (from libxml)
9
+ docid_with_zeros=$1
10
+ docid_without_zeros="$(echo $docid_with_zeros| sed 's/0*//')"
11
+ archive_number="$(((docid_without_zeros/50000)+1))"
12
+ archive_file_name=$(find . -name "aleph.PRIMO.*.$archive_number.tar.gz")
13
+ mab_file_name="PAD01.$docid_with_zeros.PRIMO.xml"
14
+
15
+ # grab the file from the tar.gz
16
+ tar -xvzf $archive_file_name $mab_file_name
17
+
18
+ # format output using xmllint
19
+ xmllint --format $mab_file_name --output $mab_file_name