mabmapper 1.0.0.pre15 → 1.0.0.pre16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/lib/mabmapper/aleph_mab_xml_engine.rb +244 -53
  4. data/lib/mabmapper/cli.rb +0 -1
  5. data/lib/mabmapper/engine.rb +2 -0
  6. data/lib/mabmapper/mab_xml/document.rb +2 -0
  7. data/lib/mabmapper/mab_xml/query_helper.rb +1 -1
  8. data/lib/mabmapper/version.rb +1 -1
  9. data/mabmapper.gemspec +4 -3
  10. data/test/mab_files/test_author_statement/PAD01.000307083.PRIMO.xml +529 -0
  11. data/test/mab_files/test_author_statement/PAD01.000317603.PRIMO.xml +396 -0
  12. data/test/mab_files/test_author_statement/PAD01.000323362.PRIMO.xml +313 -0
  13. data/test/mab_files/test_author_statement/PAD01.001100110.PRIMO.xml +139 -0
  14. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000000003.PRIMO.xml +171 -0
  15. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000017278.PRIMO.xml +517 -0
  16. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000023677.PRIMO.xml +181 -0
  17. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000301195.PRIMO.xml +130 -0
  18. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000335160.PRIMO.xml +4268 -0
  19. data/test/mab_files/test_corporate_body_creator_display/PAD01.000017864.PRIMO.xml +1931 -0
  20. data/test/mab_files/test_corporate_body_creator_display/PAD01.000102057.PRIMO.xml +137 -0
  21. data/test/mab_files/test_corporate_body_creator_display/PAD01.000129575.PRIMO.xml +105 -0
  22. data/test/mab_files/test_corporate_body_creator_display/PAD01.000143976.PRIMO.xml +161 -0
  23. data/test/mab_files/test_corporate_body_creator_display/PAD01.000147985.PRIMO.xml +925 -0
  24. data/test/mab_files/test_description/PAD01.001510737.PRIMO.xml +317 -0
  25. data/test/mab_files/test_local_comment/PAD01.001212891.PRIMO.xml +124 -0
  26. data/test/mab_files/test_local_comment/PAD01.001572143.PRIMO.xml +311 -0
  27. data/test/mab_files/test_person_contributor_display/PAD01.000322492.PRIMO.xml +120 -0
  28. data/test/mab_files/test_person_contributor_display/PAD01.001494977.PRIMO.xml +192 -0
  29. data/test/mab_files/test_person_contributor_display/PAD01.001495118.PRIMO.xml +480 -0
  30. data/test/mab_files/test_person_creator_display/PAD01.000007018.PRIMO.xml +216 -0
  31. data/test/mab_files/test_person_creator_display/PAD01.000135071.PRIMO.xml +195 -0
  32. data/test/mab_files/test_relation/PAD01.000968502.PRIMO.xml +164 -0
  33. data/test/mab_files/test_short_title_display/PAD01.000452919.PRIMO.xml +549 -0
  34. data/test/mab_files/test_short_title_display/PAD01.000558925.PRIMO.xml +527 -0
  35. data/test/mab_files/test_short_title_display/PAD01.000605735.PRIMO.xml +653 -0
  36. data/test/mab_files/test_short_title_display/PAD01.000695045.PRIMO.xml +600 -0
  37. data/test/mab_files/test_short_title_display/PAD01.000897969.PRIMO.xml +726 -0
  38. data/test/mab_files/test_short_title_display/PAD01.000998195.PRIMO.xml +463 -0
  39. data/test/mab_files/test_short_title_display/PAD01.001209722.PRIMO.xml +163 -0
  40. data/test/mab_files/test_short_title_display/PAD01.001209723.PRIMO.xml +180 -0
  41. data/test/mab_files/test_signature/PAD01.000318290.PRIMO.xml +1635 -0
  42. data/test/mab_files/test_signature/PAD01.000695094.PRIMO.xml +342 -0
  43. data/test/mab_files/test_signature/PAD01.000765779.PRIMO.xml +437 -0
  44. data/test/mab_files/test_signature/PAD01.000869906.PRIMO.xml +128 -0
  45. data/test/mab_files/test_signature/PAD01.000897969.PRIMO.xml +726 -0
  46. data/test/mab_files/test_signature/PAD01.000998195.PRIMO.xml +463 -0
  47. data/test/mab_files/test_signature/PAD01.001414237.PRIMO.xml +162 -0
  48. data/test/mab_files/test_signature/PAD01.001518782.PRIMO.xml +136 -0
  49. data/test/mab_files/test_signature_search/PAD01.000318290.PRIMO.xml +1635 -0
  50. data/test/mab_files/test_signature_search/PAD01.000452919.PRIMO.xml +549 -0
  51. data/test/mab_files/test_signature_search/PAD01.000695094.PRIMO.xml +342 -0
  52. data/test/mab_files/test_signature_search/PAD01.000765779.PRIMO.xml +437 -0
  53. data/test/mab_files/test_signature_search/PAD01.000869906.PRIMO.xml +128 -0
  54. data/test/mab_files/test_signature_search/PAD01.000897969.PRIMO.xml +726 -0
  55. data/test/mab_files/test_signature_search/PAD01.000998195.PRIMO.xml +463 -0
  56. data/test/mab_files/test_signature_search/PAD01.001414237.PRIMO.xml +162 -0
  57. data/test/mab_files/test_signature_search/PAD01.001518782.PRIMO.xml +136 -0
  58. data/test/mab_files/test_status/PAD01.000898036.PRIMO.xml +43 -0
  59. data/test/mab_files/test_superorder_display/PAD01.001559463.PRIMO.xml +124 -0
  60. data/test/mab_files/test_title_search/PAD01.000253702.PRIMO.xml +795 -0
  61. data/test/mab_files/test_title_search/PAD01.001584494.PRIMO.xml +2163 -0
  62. data/test/mabmapper/test_author_statement.rb +14 -0
  63. data/test/mabmapper/test_corporate_body_contributor_display.rb +10 -0
  64. data/test/mabmapper/test_corporate_body_creator_display.rb +27 -0
  65. data/test/mabmapper/test_description.rb +3 -0
  66. data/test/mabmapper/test_doc.rb +4 -3
  67. data/test/mabmapper/test_local_comment.rb +5 -0
  68. data/test/mabmapper/test_person_contributor_display.rb +6 -0
  69. data/test/mabmapper/test_person_creator_display.rb +5 -0
  70. data/test/mabmapper/test_relation.rb +3 -0
  71. data/test/mabmapper/test_short_title_display.rb +18 -1
  72. data/test/mabmapper/test_signature.rb +16 -0
  73. data/test/mabmapper/test_signature_search.rb +20 -4
  74. data/test/mabmapper/test_status.rb +3 -0
  75. data/test/mabmapper/test_superorder_display.rb +3 -0
  76. data/test/mabmapper/test_title_search.rb +35 -0
  77. metadata +154 -23
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: edfa34f2b352b77bf53e7f581e3d60e8c59c84bf
4
- data.tar.gz: 4dad71a39bddd155c2c3595a0995278a882e67a1
3
+ metadata.gz: c386816c140e51c20adc3f14f7a9e3366f5c0fe8
4
+ data.tar.gz: 1712293a68164eea826a58fe020ffb717d130411
5
5
  SHA512:
6
- metadata.gz: af04565ff5c6bd6fa406a124b865fd412353e3bae166cb798054cf79ec66e9632416269753539a5f3d2201f0c9831e30c702f31416d481efc6bde4392c3ecb7b
7
- data.tar.gz: 8445b5bc982ee1254c110cf30b93508703c11b4d1f2d4dc6868b061fc7fd6bf86dffb18144d667d041c35940c35f06d36dc41ce5e960dfa474816b67e7701661
6
+ metadata.gz: c8fbde45856cb171d4a762f477a019ae6a1958e7061ed0511f47ab08d977679d13d599f8166f049bdf2e53f51f13ac1bc02e69d0de32e84babab971429a317be
7
+ data.tar.gz: 761f5ed07daec64d18e6d53aac08f6b886026600cdca9762569b42933288d25d43f0fcbf03bb9bccbbcee509e521fcd0c2f2768e798be0b8f4f4fe8358be0102
data/CHANGELOG.md ADDED
@@ -0,0 +1,13 @@
1
+ Changelog
2
+ =========
3
+
4
+ * [2013-12-19] Added 334 to short_title_display
5
+ * [2013-12-18] Take all aleph expanded 200er fields and their order into account for signature
6
+ * [2013-11-29] Do not remove superorder_display entries without valid ht_number, just check for label existence
7
+ * [2013-11-29] Added author_statement field (mab 359)
8
+ * [2013-11-29] Added person_creator_display field
9
+ * [2013-11-29] Added person_contributor_display field
10
+ * [2013-11-29] Added corporate_body_creator_display field
11
+ * [2013-11-29] Added corporate_body_contributor_display field
12
+ * [2013-11-27] For short_title_display, take mab 310 into account
13
+ * [2013-11-27] Fixed handling of ind1: ['-', ...] (the '-' caused trouble)
@@ -24,6 +24,8 @@ module Mabmapper
24
24
  # Standort Detmold unterdrücken
25
25
  detmold_locations = doc.field('LOC').subfield('n').get.values.flatten
26
26
  value = 'D' if detmold_locations.present? && detmold_locations.all?{|v| v == '50'}
27
+ # Interimsaufnahmen unterdrücken
28
+ value = 'D' if doc.field('537', ind1: '-', ind2: '1').subfield('a').get.values.flatten.any? { |v| v.downcase.include? 'interimsaufnahme' }
27
29
 
28
30
  value
29
31
  end
@@ -161,14 +163,21 @@ module Mabmapper
161
163
  end
162
164
 
163
165
  field :short_title_display do
164
- # 331 => Titel, 335 Titelzusatz (ind2 == 1 -> betrachteter Titel, ind2 == 2 -> Überordnung)
165
- f089_1 = doc.field('089', ind2: '1').subfield('a').get.value
166
- f331_1 = doc.field('331', ind2: '1').subfield('a').get.value
167
- f331_2 = doc.field('331', ind2: '2').subfield('a').get.value
168
- f335_1 = doc.field('335', ind2: '1').subfield('a').get.value
169
-
170
- short_title = if (f331_1 && f335_1)
166
+ f089_1 = doc.field('089', ind2: '1').subfield('a').get.value # Bandangabe in Vorlageform
167
+ f310_1 = doc.field('310', ind1: ['-', 'a'], ind2: '1').subfield('a').get.value # Hauptsachtitel in Ansetzungsform
168
+ f331_1 = doc.field('331', ind2: '1').subfield('a').get.value # Hauptsachtitel in Vorlageform oder Mischform
169
+ f331_2 = doc.field('331', ind2: '2').subfield('a').get.value #
170
+ f334_1 = doc.field('334', ind1: '-', ind2: '1').get.value # Allgemeine Materialbenennung
171
+ f335_1 = doc.field('335', ind2: '1').subfield('a').get.value # Zusätze zum Hauptsachtitel
172
+
173
+ short_title = if f310_1
174
+ f310_1
175
+ elsif (f331_1 && f334_1 && f335_1)
176
+ "#{f331_1} [#{f334_1}] : #{f335_1}"
177
+ elsif (f331_1 && f335_1)
171
178
  "#{f331_1} : #{f335_1}"
179
+ elsif f331_1 && f334_1
180
+ "#{f331_1} [#{f334_1}]"
172
181
  elsif f331_1
173
182
  f331_1
174
183
  elsif f089_1 && f089_1.length > 3 && f089_1[/\A(\d|\s)+\Z/].nil? && !['buch', 'hauptbd.'].include?(f089_1.gsub(/\[|\]/, '').downcase)
@@ -209,13 +218,115 @@ module Mabmapper
209
218
  search_titles << doc.field('627', ind2: '1').subfield('a').get.values
210
219
  search_titles << doc.field('633', ind2: '1').subfield('a').get.values
211
220
 
212
- search_titles.flatten.map(&:presence).compact.uniq
221
+ search_titles
222
+ .flatten
223
+ .compact
224
+ .map do |search_title|
225
+ # add entries without dashes in words, if words with dashes are present
226
+ if search_title.match(/[A-ZÄÖÜ][a-zäöü]+\-[A-ZÄÖÜ][a-zäöü]+/)
227
+ [search_title, search_title.split(' ').map { |string| string.gsub(/([A-ZÄÖÜ][a-zäöü]+)\-([A-ZÄÖÜ][a-zäöü]+)/, '\1\2').downcase.capitalize }.join(' ')]
228
+ else
229
+ search_title
230
+ end
231
+ end
232
+ .flatten
233
+ .map(&:presence).compact.uniq
234
+ end
235
+
236
+
237
+ #
238
+ # creator
239
+ #
240
+ field :person_creator_display do
241
+ creators = []
242
+
243
+ # Personen
244
+ (100..196).step(4) do |f|
245
+ doc.field("#{f}", ind1: ['-', 'a'], ind2: ['1', '2']).get.fields.each do |field|
246
+ creators << (field.get_subfield('a').try(:value) || field.get_subfield('p').try(:value) || field.get_subfield('c').try(:value))
247
+ end
248
+ end
249
+
250
+ creators.map(&:presence).compact.uniq
251
+ end
252
+
253
+ def self.corporate_body_from_field(field)
254
+ subfield_a = field.get_subfield('a').try(:value) # Körperschafts-/Kongressname/Geografikum ohne IDN-Verknüpfung (NW)
255
+ subfield_b = field.get_subfield('b').try(:value) # Unterordnung
256
+ subfield_c = field.get_subfield('c').try(:value) # Ort (NW)
257
+ subfield_d = field.get_subfield('d').try(:value) # Datum (NW)
258
+ subfield_e = field.get_subfield('e').try(:value) # Kongressname (NW)
259
+ subfield_g = field.get_subfield('g').try(:value) # Name des Geografikums (NW)
260
+ subfield_h = field.get_subfield('h').try(:value) # Zusatz
261
+ subfield_k = field.get_subfield('k').try(:value) # Körperschaftsname (NW)
262
+ subfield_n = field.get_subfield('n').try(:value) # Zählung (W)
263
+ subfield_x = field.get_subfield('x').try(:value) # nachgeordneter Teil (W)
264
+ subfield_z = field.get_subfield('x').try(:value) # geografische Unterteilung (W)
265
+
266
+ if !subfield_a && subfield_b && !subfield_c && !subfield_e && !subfield_g && subfield_h && subfield_k && !subfield_x && !subfield_z
267
+ "#{subfield_k} <#{subfield_h}> / #{subfield_b}"
268
+ else
269
+ [
270
+ subfield_a,
271
+ subfield_k,
272
+ subfield_e,
273
+ subfield_g,
274
+ subfield_b ? "/ #{subfield_b}" : nil,
275
+ "<#{[subfield_h, subfield_n, subfield_d, subfield_c, subfield_x, subfield_z].compact.join(', ').presence}>",
276
+ ].compact.join(' ').try(:sub, '<>', '').try(:strip)
277
+ end
278
+ end
279
+
280
+ field :corporate_body_creator_display do
281
+ creators = []
282
+
283
+ # Körpferschaften
284
+ (200..296).step(4) do |f|
285
+ doc.field("#{f}", ind1: ['-', 'a'], ind2: ['1', '2']).get.fields.each do |field|
286
+ creators << engine.corporate_body_from_field(field)
287
+ end
288
+ end
289
+
290
+ creators.map(&:presence).compact.uniq
213
291
  end
214
292
 
293
+ field :author_statement do
294
+ f359_1 = doc.field('359', ind1:"-", ind2:'1').subfield('a').get.values.flatten.presence
295
+ f359_2 = doc.field('359', ind1:"-", ind2:'2').subfield('a').get.values.flatten.presence
296
+ f359_1 || f359_2
297
+ end
215
298
 
216
299
  #
217
- # Creator
300
+ # contributor
218
301
  #
302
+ field :person_contributor_display do
303
+ contributors = []
304
+
305
+ # Personen
306
+ (100..196).step(4) do |f|
307
+ doc.field("#{f}", ind1: ['b', 'c', 'e', 'f'], ind2: ['1', '2']).get.fields.each do |field|
308
+ name = (field.get_subfield('a').try(:value) || field.get_subfield('p').try(:value) || field.get_subfield('c').try(:value))
309
+ action_designator = field.get_subfield('b').try(:value)
310
+ contributors << [name, action_designator].map(&:presence).compact.join(' ')
311
+ end
312
+ end
313
+
314
+ contributors.map(&:presence).compact.uniq
315
+ end
316
+
317
+ field :corporate_body_contributor_display do
318
+ contributors = []
319
+
320
+ # Körpferschaften
321
+ (200..296).step(4) do |f|
322
+ doc.field("#{f}", ind1: ['b', 'c', 'e', 'f'], ind2: ['1', '2']).get.fields.each do |field|
323
+ contributors << engine.corporate_body_from_field(field)
324
+ end
325
+ end
326
+
327
+ contributors.map(&:presence).compact.uniq
328
+ end
329
+
219
330
  field :creator_contributor_display do
220
331
  creators = []
221
332
 
@@ -491,33 +602,6 @@ module Mabmapper
491
602
  ddc_fields.flatten.map(&:presence).compact.uniq
492
603
  end
493
604
 
494
- #
495
- # Description
496
- #
497
- field :description do
498
- descriptions = []
499
-
500
- # 405 - Erscheinungsverlauf von Zeitschriften
501
- descriptions << doc.field('405', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
502
-
503
- # 522 - Teilungsvermerk bei fortlaufenden Sammelwerken
504
- descriptions << doc.field('522', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
505
-
506
- # 523 - Erscheinungsverlauf von Monos
507
- descriptions << doc.field('523', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
508
-
509
- (501..519).each do |f|
510
- descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
511
- end
512
-
513
- (536..537).each do |f|
514
- descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
515
- end
516
-
517
- # Finally...
518
- descriptions.flatten.map(&:presence).compact.uniq
519
- end
520
-
521
605
  #
522
606
  # Abstracts
523
607
  #
@@ -553,6 +637,13 @@ module Mabmapper
553
637
  }
554
638
  end
555
639
 
640
+ if (f022a = doc.field('022').subfield('a').get.value).present?
641
+ relations << {
642
+ ht_number: f022a,
643
+ label: 'Sekundärform'
644
+ }
645
+ end
646
+
556
647
  (526..534).each do |mab_field_number|
557
648
  doc.field("#{mab_field_number}", ind2: '1').get.fields.each do |field|
558
649
  ht_number = field.get_subfield('9').try(:value).presence
@@ -592,7 +683,7 @@ module Mabmapper
592
683
 
593
684
  superorders
594
685
  .map(&:presence)
595
- .delete_if { |element| element[:ht_number].blank? }
686
+ .delete_if { |element| element[:label].blank? }
596
687
  .each do |element|
597
688
  # remove 'not sort' indicators from label
598
689
  element[:label].try(:gsub!, /<<|>>/, '')
@@ -705,6 +796,33 @@ module Mabmapper
705
796
  type
706
797
  end
707
798
 
799
+ #
800
+ # Description
801
+ #
802
+ field :description do
803
+ descriptions = []
804
+
805
+ # 405 - Erscheinungsverlauf von Zeitschriften
806
+ descriptions << doc.field('405', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
807
+
808
+ # 522 - Teilungsvermerk bei fortlaufenden Sammelwerken
809
+ descriptions << doc.field('522', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
810
+
811
+ # 523 - Erscheinungsverlauf von Monos
812
+ descriptions << doc.field('523', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
813
+
814
+ (501..519).each do |f|
815
+ descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
816
+ end
817
+
818
+ (536..537).each do |f|
819
+ descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ') unless f == 537 && ref(:erscheinungsform) == "journal"
820
+ end
821
+
822
+ # Finally...
823
+ descriptions.flatten.map(&:presence).compact.uniq
824
+ end
825
+
708
826
  #
709
827
  # Delivery Catagory
710
828
  #
@@ -733,7 +851,7 @@ module Mabmapper
733
851
  # Notation
734
852
  #
735
853
  field :notation do
736
- doc.field('700', ind2: ' ').subfield('a').get.values(join_subfields: true)
854
+ doc.field('700', ind2: ' ').subfield('a').get.values(join_subfields: '')
737
855
  end
738
856
 
739
857
  field :notation_sort do
@@ -761,7 +879,16 @@ module Mabmapper
761
879
  all_stack = fields.map{|f| f.subfields.find {|sf| sf.name == 'b' && sf.value.match(/02|03|04|07/)}.present?}.all?
762
880
 
763
881
  # Zeitschriftensignatur (haben Vorrgang, falls vorhanden)
764
- signatures << doc.field('200', ind2: ' ').subfield('f').get.value.try(:gsub, ' ', '')
882
+ #
883
+ # Achtung, bei Feld 200 handelt es sich um einen Aleph-Expand. Dieses Feld ist an den beiden leeren Indikatoren zu erkennen.
884
+ # Darüber hinaus kann dieses Feld mehrfach vorkommen. Wir nehmen an, dass Subfeld 0 eine Art Zählung angibt, weshalb dort
885
+ # ein Wert von '1' zu bevorzugen ist.
886
+ #
887
+ signatures << doc.field('200', ind1: ' ', ind2: ' ').get.fields
888
+ .select { |f| f.get_subfield('f').present? }
889
+ .select { |f| (value = f.get_subfield('0').try(:value)) == '1' || value.nil? }
890
+ .map { |f| f.get_subfield('f').value.try(:gsub, ' ', '') }
891
+ .first.presence
765
892
 
766
893
  # Wenn alle Exemplare im Magzin stehen, dann nimm nur die erste signatur
767
894
  if all_stack
@@ -803,6 +930,27 @@ module Mabmapper
803
930
  # Stücktitel Signatur
804
931
  signatures << doc.field('100', ind2: ' ').subfield('a').get.value
805
932
 
933
+ # Some additional love for journal signatures
934
+ signatures.map! do |signature|
935
+ # if this is a journal signature
936
+ if signature.try(:[], /\d+[A-Za-z]\d+$/).present?
937
+ # unless there is a leading standortkennziffer
938
+ unless signature.starts_with?('P')
939
+ standort_kennziffer = if (loc_standort_kennziffer = doc.field('LOC').subfield('b').get.value).present?
940
+ loc_standort_kennziffer
941
+ elsif (f105a = doc.field('105').subfield('a').get.value).present?
942
+ f105a
943
+ end
944
+
945
+ standort_kennziffer.present? ? "P#{standort_kennziffer}/#{signature}".gsub(/\/\//, '/') : signature
946
+ else
947
+ signature
948
+ end.downcase.capitalize # last but not least make journal signatures like P10/34T24 to P10/34t24
949
+ else
950
+ signature
951
+ end
952
+ end
953
+
806
954
  # Fertig. Wir nehmen die erste Signatur zur Anzeige
807
955
  signatures.flatten.map(&:presence).compact.uniq.first
808
956
  end
@@ -813,9 +961,10 @@ module Mabmapper
813
961
  # Stücktitel Signatur
814
962
  signatures << doc.field('100', ind2: ' ').subfield('a').get.value
815
963
  # Zeitschriftensignatur
816
- signatures << doc.field('200', ind2: ' ').subfield('f').get.value
964
+ signatures << doc.field('200', ind1: ' ', ind2: ' ').subfield('f').get.values
817
965
 
818
- signatures.flatten.map(&:presence).compact.map do |signature|
966
+ signatures = signatures.flatten.map(&:presence).compact
967
+ .map do |signature|
819
968
  _signature = signature
820
969
  .gsub(/\A\//, '') # remove leading '/' for some journal signatures
821
970
  .gsub(/\s+/, '') # remove spaces for some journal signatures (e.g. 'P 10/34 t 26')
@@ -826,7 +975,31 @@ module Mabmapper
826
975
 
827
976
  # for journals which only have one single signature with leading 'Pxx/' like 'P10/34M3' create 'Pxx/'-less version also
828
977
  _signature_array.push _signature.gsub(/\AP\d+\//, '')
829
- end.flatten.uniq
978
+ end.flatten
979
+
980
+ # if any signature is a journal signature
981
+ if (journal_signature = signatures.select { |signature| signature.try(:[], /\d+[A-Za-z]\d+$/).present? }.first).present?
982
+ if signatures.none? { |signature| signature.starts_with? 'P' }
983
+ # TODO: code duplication with :signature
984
+ standort_kennziffer = if (loc_standort_kennziffer = doc.field('LOC').subfield('b').get.value).present?
985
+ loc_standort_kennziffer
986
+ elsif (f105a = doc.field('105').subfield('a').get.value).present?
987
+ f105a
988
+ end
989
+
990
+ if standort_kennziffer.present?
991
+ signatures << "P#{standort_kennziffer}/#{journal_signature}".gsub(/\/\//, '/')
992
+ end
993
+ end
994
+ end
995
+
996
+ signatures.map! do |signature|
997
+ is_journal_signature = signature.match(/(P\d\d\/)?\d\d?[a-zA-Z]\d\d?/)
998
+ spaced_journal_signature = signature.gsub(/\AP(\d\d)/, 'P \1').gsub(/(\d\d?)([a-zA-Z])(\d\d?)/, '\1 \2 \3') if is_journal_signature
999
+ [signature, spaced_journal_signature]
1000
+ end.flatten!
1001
+
1002
+ signatures.flatten.map(&:presence).compact.uniq
830
1003
  end
831
1004
 
832
1005
  #
@@ -839,7 +1012,7 @@ module Mabmapper
839
1012
  field :resource_link do
840
1013
  fulltext_links = []
841
1014
 
842
- links = doc.field('655').subfield(['u', '3', 'z', 't']).get
1015
+ links = doc.field('655').subfield(['u', '3', 'z', 't']).get.fields
843
1016
  links.each do |link|
844
1017
  url = link.get_subfield('u').try(:value)
845
1018
  subfield_3 = link.get_subfield('3') # HBZ Inhaltsverzeichnisse
@@ -862,7 +1035,7 @@ module Mabmapper
862
1035
  field :link_to_toc do
863
1036
  toc_links = []
864
1037
 
865
- links = doc.field('655').subfield(['u', '3', 'z', 't']).get
1038
+ links = doc.field('655').subfield(['u', '3', 'z', 't']).get.fields
866
1039
  links.each do |link|
867
1040
  url = link.get_subfield('u').try(:value)
868
1041
  subfield_3 = link.get_subfield('3') # HBZ Inhaltsverzeichnisse
@@ -903,7 +1076,7 @@ module Mabmapper
903
1076
  # f - Signatur
904
1077
  r = []
905
1078
 
906
- fields = doc.field('200', ind2: ' ').subfield(['0', 'a', 'b', 'c', 'e', 'f']).get
1079
+ fields = doc.field('200', ind2: ' ').subfield(['0', 'a', 'b', 'c', 'e', 'f']).get.fields
907
1080
  fields.each do |field|
908
1081
  field_0 = field.get_subfield('0')
909
1082
  field_a = field.get_subfield('a')
@@ -1032,19 +1205,37 @@ module Mabmapper
1032
1205
  ].select { |superorder| superorder[:label].present? }.map(&:to_json).presence
1033
1206
  end
1034
1207
 
1208
+ field :local_comment do
1209
+ doc.field('125', ind1: ' ', ind2: ' ').subfield(['_', 'a']).get.fields.map(&:values).flatten.uniq.presence
1210
+ end
1211
+
1035
1212
  #
1036
- # doc
1213
+ # additional_data
1214
+ # ( a complex data structure to be stored by the search engine; avoids to touch the normalization rules everytime )
1037
1215
  #
1038
-
1039
- # a complex data structure to be stored by the search engine
1040
- field :doc do
1041
- doc = {
1042
- :redactional_remark => ref(:redactional_remark)
1216
+ field :additional_data do
1217
+ additional_data = {
1218
+ author_statement: ref(:author_statement),
1219
+ corporate_body_contributor_display: ref(:corporate_body_contributor_display),
1220
+ corporate_body_creator_display: ref(:corporate_body_creator_display),
1221
+ local_comment: ref(:local_comment),
1222
+ person_contributor_display: ref(:person_contributor_display),
1223
+ person_creator_display: ref(:person_creator_display),
1224
+ redactional_remark: ref(:redactional_remark)
1225
+
1043
1226
  }
1044
1227
  .inject({}) { |hash, (key, value)| hash[key] = value if value.present?; hash }
1045
-
1046
- doc.to_json if doc.present?
1228
+
1229
+ additional_data.to_json if additional_data.present?
1047
1230
  end
1048
1231
 
1232
+ #
1233
+ # mab
1234
+ #
1235
+ #field :mab do
1236
+ # (filtered_xml = doc.xml.clone).xpath('/OAI-PMH/ListRecords/record/metadata/record/datafield[@tag="TXT" or @tag="PLK" or @tag="PSW" or @tag="PPE"]').remove
1237
+ # Base64.strict_encode64(ActiveSupport::Gzip.compress(filtered_xml, Zlib::BEST_COMPRESSION)) unless filtered_xml.nil?
1238
+ #end
1239
+
1049
1240
  end
1050
1241
  end
data/lib/mabmapper/cli.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  #
2
2
  # The command line interface class
3
3
  #
4
- require 'rubygems/test_utilities'
5
4
  require 'mabmapper/elasticsearch_writer'
6
5
  require 'mabmapper/tar_writer'
7
6
 
@@ -76,6 +76,8 @@ module Mabmapper
76
76
 
77
77
  # @see: http://www.dan-manges.com/blog/ruby-dsls-instance-eval-with-delegation
78
78
  class Field
79
+ attr_accessor :engine
80
+
79
81
  def initialize(name, &block)
80
82
  @name = name.to_s
81
83
  @proc = block
@@ -6,6 +6,8 @@ module Mabmapper
6
6
  class Document
7
7
  include QueryHelper
8
8
 
9
+ attr_accessor :xml
10
+
9
11
  def initialize(contents)
10
12
  @xml = Nokogiri::XML(contents)
11
13
  @xml.remove_namespaces!
@@ -85,7 +85,7 @@ module Mabmapper
85
85
 
86
86
  options = [*value].map do |value|
87
87
  if value
88
- negation = value.starts_with?('-') and value.length > 1
88
+ negation = value.starts_with?('-') && value.length > 1
89
89
  global_negation = true if negation
90
90
 
91
91
  negation ? "not(@#{name}='#{value.slice(1..-1)}')" : "@#{name}='#{value}'"
@@ -1,3 +1,3 @@
1
1
  module Mabmapper
2
- VERSION = "1.0.0.pre15"
2
+ VERSION = "1.0.0.pre16"
3
3
  end
data/mabmapper.gemspec CHANGED
@@ -27,7 +27,8 @@ Gem::Specification.new do |gem|
27
27
  gem.add_dependency('oj', '~> 2.1.4')
28
28
  gem.add_dependency('stringex', '~> 2.1.0')
29
29
 
30
- gem.add_development_dependency('minitest', '~> 4.7.5')
31
- gem.add_development_dependency('pry', '~> 0.9.12.2')
32
- gem.add_development_dependency('pry-nav', '~> 0.2.3')
30
+ gem.add_development_dependency('minitest', '~> 4.7.5')
31
+ gem.add_development_dependency('pry', '0.9.12.2') # stuck to 0.9.12.2 due to repl color issue
32
+ gem.add_development_dependency('pry-nav', '~> 0.2.3')
33
+ gem.add_development_dependency('pry-syntax-hacks', '~> 0.0.6')
33
34
  end