mabmapper 1.0.0.pre15 → 1.0.0.pre16

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +13 -0
  3. data/lib/mabmapper/aleph_mab_xml_engine.rb +244 -53
  4. data/lib/mabmapper/cli.rb +0 -1
  5. data/lib/mabmapper/engine.rb +2 -0
  6. data/lib/mabmapper/mab_xml/document.rb +2 -0
  7. data/lib/mabmapper/mab_xml/query_helper.rb +1 -1
  8. data/lib/mabmapper/version.rb +1 -1
  9. data/mabmapper.gemspec +4 -3
  10. data/test/mab_files/test_author_statement/PAD01.000307083.PRIMO.xml +529 -0
  11. data/test/mab_files/test_author_statement/PAD01.000317603.PRIMO.xml +396 -0
  12. data/test/mab_files/test_author_statement/PAD01.000323362.PRIMO.xml +313 -0
  13. data/test/mab_files/test_author_statement/PAD01.001100110.PRIMO.xml +139 -0
  14. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000000003.PRIMO.xml +171 -0
  15. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000017278.PRIMO.xml +517 -0
  16. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000023677.PRIMO.xml +181 -0
  17. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000301195.PRIMO.xml +130 -0
  18. data/test/mab_files/test_corporate_body_contributor_display/PAD01.000335160.PRIMO.xml +4268 -0
  19. data/test/mab_files/test_corporate_body_creator_display/PAD01.000017864.PRIMO.xml +1931 -0
  20. data/test/mab_files/test_corporate_body_creator_display/PAD01.000102057.PRIMO.xml +137 -0
  21. data/test/mab_files/test_corporate_body_creator_display/PAD01.000129575.PRIMO.xml +105 -0
  22. data/test/mab_files/test_corporate_body_creator_display/PAD01.000143976.PRIMO.xml +161 -0
  23. data/test/mab_files/test_corporate_body_creator_display/PAD01.000147985.PRIMO.xml +925 -0
  24. data/test/mab_files/test_description/PAD01.001510737.PRIMO.xml +317 -0
  25. data/test/mab_files/test_local_comment/PAD01.001212891.PRIMO.xml +124 -0
  26. data/test/mab_files/test_local_comment/PAD01.001572143.PRIMO.xml +311 -0
  27. data/test/mab_files/test_person_contributor_display/PAD01.000322492.PRIMO.xml +120 -0
  28. data/test/mab_files/test_person_contributor_display/PAD01.001494977.PRIMO.xml +192 -0
  29. data/test/mab_files/test_person_contributor_display/PAD01.001495118.PRIMO.xml +480 -0
  30. data/test/mab_files/test_person_creator_display/PAD01.000007018.PRIMO.xml +216 -0
  31. data/test/mab_files/test_person_creator_display/PAD01.000135071.PRIMO.xml +195 -0
  32. data/test/mab_files/test_relation/PAD01.000968502.PRIMO.xml +164 -0
  33. data/test/mab_files/test_short_title_display/PAD01.000452919.PRIMO.xml +549 -0
  34. data/test/mab_files/test_short_title_display/PAD01.000558925.PRIMO.xml +527 -0
  35. data/test/mab_files/test_short_title_display/PAD01.000605735.PRIMO.xml +653 -0
  36. data/test/mab_files/test_short_title_display/PAD01.000695045.PRIMO.xml +600 -0
  37. data/test/mab_files/test_short_title_display/PAD01.000897969.PRIMO.xml +726 -0
  38. data/test/mab_files/test_short_title_display/PAD01.000998195.PRIMO.xml +463 -0
  39. data/test/mab_files/test_short_title_display/PAD01.001209722.PRIMO.xml +163 -0
  40. data/test/mab_files/test_short_title_display/PAD01.001209723.PRIMO.xml +180 -0
  41. data/test/mab_files/test_signature/PAD01.000318290.PRIMO.xml +1635 -0
  42. data/test/mab_files/test_signature/PAD01.000695094.PRIMO.xml +342 -0
  43. data/test/mab_files/test_signature/PAD01.000765779.PRIMO.xml +437 -0
  44. data/test/mab_files/test_signature/PAD01.000869906.PRIMO.xml +128 -0
  45. data/test/mab_files/test_signature/PAD01.000897969.PRIMO.xml +726 -0
  46. data/test/mab_files/test_signature/PAD01.000998195.PRIMO.xml +463 -0
  47. data/test/mab_files/test_signature/PAD01.001414237.PRIMO.xml +162 -0
  48. data/test/mab_files/test_signature/PAD01.001518782.PRIMO.xml +136 -0
  49. data/test/mab_files/test_signature_search/PAD01.000318290.PRIMO.xml +1635 -0
  50. data/test/mab_files/test_signature_search/PAD01.000452919.PRIMO.xml +549 -0
  51. data/test/mab_files/test_signature_search/PAD01.000695094.PRIMO.xml +342 -0
  52. data/test/mab_files/test_signature_search/PAD01.000765779.PRIMO.xml +437 -0
  53. data/test/mab_files/test_signature_search/PAD01.000869906.PRIMO.xml +128 -0
  54. data/test/mab_files/test_signature_search/PAD01.000897969.PRIMO.xml +726 -0
  55. data/test/mab_files/test_signature_search/PAD01.000998195.PRIMO.xml +463 -0
  56. data/test/mab_files/test_signature_search/PAD01.001414237.PRIMO.xml +162 -0
  57. data/test/mab_files/test_signature_search/PAD01.001518782.PRIMO.xml +136 -0
  58. data/test/mab_files/test_status/PAD01.000898036.PRIMO.xml +43 -0
  59. data/test/mab_files/test_superorder_display/PAD01.001559463.PRIMO.xml +124 -0
  60. data/test/mab_files/test_title_search/PAD01.000253702.PRIMO.xml +795 -0
  61. data/test/mab_files/test_title_search/PAD01.001584494.PRIMO.xml +2163 -0
  62. data/test/mabmapper/test_author_statement.rb +14 -0
  63. data/test/mabmapper/test_corporate_body_contributor_display.rb +10 -0
  64. data/test/mabmapper/test_corporate_body_creator_display.rb +27 -0
  65. data/test/mabmapper/test_description.rb +3 -0
  66. data/test/mabmapper/test_doc.rb +4 -3
  67. data/test/mabmapper/test_local_comment.rb +5 -0
  68. data/test/mabmapper/test_person_contributor_display.rb +6 -0
  69. data/test/mabmapper/test_person_creator_display.rb +5 -0
  70. data/test/mabmapper/test_relation.rb +3 -0
  71. data/test/mabmapper/test_short_title_display.rb +18 -1
  72. data/test/mabmapper/test_signature.rb +16 -0
  73. data/test/mabmapper/test_signature_search.rb +20 -4
  74. data/test/mabmapper/test_status.rb +3 -0
  75. data/test/mabmapper/test_superorder_display.rb +3 -0
  76. data/test/mabmapper/test_title_search.rb +35 -0
  77. metadata +154 -23
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: edfa34f2b352b77bf53e7f581e3d60e8c59c84bf
4
- data.tar.gz: 4dad71a39bddd155c2c3595a0995278a882e67a1
3
+ metadata.gz: c386816c140e51c20adc3f14f7a9e3366f5c0fe8
4
+ data.tar.gz: 1712293a68164eea826a58fe020ffb717d130411
5
5
  SHA512:
6
- metadata.gz: af04565ff5c6bd6fa406a124b865fd412353e3bae166cb798054cf79ec66e9632416269753539a5f3d2201f0c9831e30c702f31416d481efc6bde4392c3ecb7b
7
- data.tar.gz: 8445b5bc982ee1254c110cf30b93508703c11b4d1f2d4dc6868b061fc7fd6bf86dffb18144d667d041c35940c35f06d36dc41ce5e960dfa474816b67e7701661
6
+ metadata.gz: c8fbde45856cb171d4a762f477a019ae6a1958e7061ed0511f47ab08d977679d13d599f8166f049bdf2e53f51f13ac1bc02e69d0de32e84babab971429a317be
7
+ data.tar.gz: 761f5ed07daec64d18e6d53aac08f6b886026600cdca9762569b42933288d25d43f0fcbf03bb9bccbbcee509e521fcd0c2f2768e798be0b8f4f4fe8358be0102
data/CHANGELOG.md ADDED
@@ -0,0 +1,13 @@
1
+ Changelog
2
+ =========
3
+
4
+ * [2013-12-19] Added 334 to short_title_display
5
+ * [2013-12-18] Take all aleph expanded 200er fields and their order into account for signature
6
+ * [2013-11-29] Do not remove superorder_display entries without valid ht_number, just check for label existence
7
+ * [2013-11-29] Added author_statement field (mab 359)
8
+ * [2013-11-29] Added person_creator_display field
9
+ * [2013-11-29] Added person_contributor_display field
10
+ * [2013-11-29] Added corporate_body_creator_display field
11
+ * [2013-11-29] Added corporate_body_contributor_display field
12
+ * [2013-11-27] For short_title_display, take mab 310 into account
13
+ * [2013-11-27] Fixed handling of ind1: ['-', ...] (the '-' caused trouble)
@@ -24,6 +24,8 @@ module Mabmapper
24
24
  # Standort Detmold unterdrücken
25
25
  detmold_locations = doc.field('LOC').subfield('n').get.values.flatten
26
26
  value = 'D' if detmold_locations.present? && detmold_locations.all?{|v| v == '50'}
27
+ # Interimsaufnahmen unterdrücken
28
+ value = 'D' if doc.field('537', ind1: '-', ind2: '1').subfield('a').get.values.flatten.any? { |v| v.downcase.include? 'interimsaufnahme' }
27
29
 
28
30
  value
29
31
  end
@@ -161,14 +163,21 @@ module Mabmapper
161
163
  end
162
164
 
163
165
  field :short_title_display do
164
- # 331 => Titel, 335 Titelzusatz (ind2 == 1 -> betrachteter Titel, ind2 == 2 -> Überordnung)
165
- f089_1 = doc.field('089', ind2: '1').subfield('a').get.value
166
- f331_1 = doc.field('331', ind2: '1').subfield('a').get.value
167
- f331_2 = doc.field('331', ind2: '2').subfield('a').get.value
168
- f335_1 = doc.field('335', ind2: '1').subfield('a').get.value
169
-
170
- short_title = if (f331_1 && f335_1)
166
+ f089_1 = doc.field('089', ind2: '1').subfield('a').get.value # Bandangabe in Vorlageform
167
+ f310_1 = doc.field('310', ind1: ['-', 'a'], ind2: '1').subfield('a').get.value # Hauptsachtitel in Ansetzungsform
168
+ f331_1 = doc.field('331', ind2: '1').subfield('a').get.value # Hauptsachtitel in Vorlageform oder Mischform
169
+ f331_2 = doc.field('331', ind2: '2').subfield('a').get.value #
170
+ f334_1 = doc.field('334', ind1: '-', ind2: '1').get.value # Allgemeine Materialbenennung
171
+ f335_1 = doc.field('335', ind2: '1').subfield('a').get.value # Zusätze zum Hauptsachtitel
172
+
173
+ short_title = if f310_1
174
+ f310_1
175
+ elsif (f331_1 && f334_1 && f335_1)
176
+ "#{f331_1} [#{f334_1}] : #{f335_1}"
177
+ elsif (f331_1 && f335_1)
171
178
  "#{f331_1} : #{f335_1}"
179
+ elsif f331_1 && f334_1
180
+ "#{f331_1} [#{f334_1}]"
172
181
  elsif f331_1
173
182
  f331_1
174
183
  elsif f089_1 && f089_1.length > 3 && f089_1[/\A(\d|\s)+\Z/].nil? && !['buch', 'hauptbd.'].include?(f089_1.gsub(/\[|\]/, '').downcase)
@@ -209,13 +218,115 @@ module Mabmapper
209
218
  search_titles << doc.field('627', ind2: '1').subfield('a').get.values
210
219
  search_titles << doc.field('633', ind2: '1').subfield('a').get.values
211
220
 
212
- search_titles.flatten.map(&:presence).compact.uniq
221
+ search_titles
222
+ .flatten
223
+ .compact
224
+ .map do |search_title|
225
+ # add entries without dashes in words, if words with dashes are present
226
+ if search_title.match(/[A-ZÄÖÜ][a-zäöü]+\-[A-ZÄÖÜ][a-zäöü]+/)
227
+ [search_title, search_title.split(' ').map { |string| string.gsub(/([A-ZÄÖÜ][a-zäöü]+)\-([A-ZÄÖÜ][a-zäöü]+)/, '\1\2').downcase.capitalize }.join(' ')]
228
+ else
229
+ search_title
230
+ end
231
+ end
232
+ .flatten
233
+ .map(&:presence).compact.uniq
234
+ end
235
+
236
+
237
+ #
238
+ # creator
239
+ #
240
+ field :person_creator_display do
241
+ creators = []
242
+
243
+ # Personen
244
+ (100..196).step(4) do |f|
245
+ doc.field("#{f}", ind1: ['-', 'a'], ind2: ['1', '2']).get.fields.each do |field|
246
+ creators << (field.get_subfield('a').try(:value) || field.get_subfield('p').try(:value) || field.get_subfield('c').try(:value))
247
+ end
248
+ end
249
+
250
+ creators.map(&:presence).compact.uniq
251
+ end
252
+
253
+ def self.corporate_body_from_field(field)
254
+ subfield_a = field.get_subfield('a').try(:value) # Körperschafts-/Kongressname/Geografikum ohne IDN-Verknüpfung (NW)
255
+ subfield_b = field.get_subfield('b').try(:value) # Unterordnung
256
+ subfield_c = field.get_subfield('c').try(:value) # Ort (NW)
257
+ subfield_d = field.get_subfield('d').try(:value) # Datum (NW)
258
+ subfield_e = field.get_subfield('e').try(:value) # Kongressname (NW)
259
+ subfield_g = field.get_subfield('g').try(:value) # Name des Geografikums (NW)
260
+ subfield_h = field.get_subfield('h').try(:value) # Zusatz
261
+ subfield_k = field.get_subfield('k').try(:value) # Körperschaftsname (NW)
262
+ subfield_n = field.get_subfield('n').try(:value) # Zählung (W)
263
+ subfield_x = field.get_subfield('x').try(:value) # nachgeordneter Teil (W)
264
+ subfield_z = field.get_subfield('x').try(:value) # geografische Unterteilung (W)
265
+
266
+ if !subfield_a && subfield_b && !subfield_c && !subfield_e && !subfield_g && subfield_h && subfield_k && !subfield_x && !subfield_z
267
+ "#{subfield_k} <#{subfield_h}> / #{subfield_b}"
268
+ else
269
+ [
270
+ subfield_a,
271
+ subfield_k,
272
+ subfield_e,
273
+ subfield_g,
274
+ subfield_b ? "/ #{subfield_b}" : nil,
275
+ "<#{[subfield_h, subfield_n, subfield_d, subfield_c, subfield_x, subfield_z].compact.join(', ').presence}>",
276
+ ].compact.join(' ').try(:sub, '<>', '').try(:strip)
277
+ end
278
+ end
279
+
280
+ field :corporate_body_creator_display do
281
+ creators = []
282
+
283
+ # Körpferschaften
284
+ (200..296).step(4) do |f|
285
+ doc.field("#{f}", ind1: ['-', 'a'], ind2: ['1', '2']).get.fields.each do |field|
286
+ creators << engine.corporate_body_from_field(field)
287
+ end
288
+ end
289
+
290
+ creators.map(&:presence).compact.uniq
213
291
  end
214
292
 
293
+ field :author_statement do
294
+ f359_1 = doc.field('359', ind1:"-", ind2:'1').subfield('a').get.values.flatten.presence
295
+ f359_2 = doc.field('359', ind1:"-", ind2:'2').subfield('a').get.values.flatten.presence
296
+ f359_1 || f359_2
297
+ end
215
298
 
216
299
  #
217
- # Creator
300
+ # contributor
218
301
  #
302
+ field :person_contributor_display do
303
+ contributors = []
304
+
305
+ # Personen
306
+ (100..196).step(4) do |f|
307
+ doc.field("#{f}", ind1: ['b', 'c', 'e', 'f'], ind2: ['1', '2']).get.fields.each do |field|
308
+ name = (field.get_subfield('a').try(:value) || field.get_subfield('p').try(:value) || field.get_subfield('c').try(:value))
309
+ action_designator = field.get_subfield('b').try(:value)
310
+ contributors << [name, action_designator].map(&:presence).compact.join(' ')
311
+ end
312
+ end
313
+
314
+ contributors.map(&:presence).compact.uniq
315
+ end
316
+
317
+ field :corporate_body_contributor_display do
318
+ contributors = []
319
+
320
+ # Körpferschaften
321
+ (200..296).step(4) do |f|
322
+ doc.field("#{f}", ind1: ['b', 'c', 'e', 'f'], ind2: ['1', '2']).get.fields.each do |field|
323
+ contributors << engine.corporate_body_from_field(field)
324
+ end
325
+ end
326
+
327
+ contributors.map(&:presence).compact.uniq
328
+ end
329
+
219
330
  field :creator_contributor_display do
220
331
  creators = []
221
332
 
@@ -491,33 +602,6 @@ module Mabmapper
491
602
  ddc_fields.flatten.map(&:presence).compact.uniq
492
603
  end
493
604
 
494
- #
495
- # Description
496
- #
497
- field :description do
498
- descriptions = []
499
-
500
- # 405 - Erscheinungsverlauf von Zeitschriften
501
- descriptions << doc.field('405', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
502
-
503
- # 522 - Teilungsvermerk bei fortlaufenden Sammelwerken
504
- descriptions << doc.field('522', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
505
-
506
- # 523 - Erscheinungsverlauf von Monos
507
- descriptions << doc.field('523', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
508
-
509
- (501..519).each do |f|
510
- descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
511
- end
512
-
513
- (536..537).each do |f|
514
- descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
515
- end
516
-
517
- # Finally...
518
- descriptions.flatten.map(&:presence).compact.uniq
519
- end
520
-
521
605
  #
522
606
  # Abstracts
523
607
  #
@@ -553,6 +637,13 @@ module Mabmapper
553
637
  }
554
638
  end
555
639
 
640
+ if (f022a = doc.field('022').subfield('a').get.value).present?
641
+ relations << {
642
+ ht_number: f022a,
643
+ label: 'Sekundärform'
644
+ }
645
+ end
646
+
556
647
  (526..534).each do |mab_field_number|
557
648
  doc.field("#{mab_field_number}", ind2: '1').get.fields.each do |field|
558
649
  ht_number = field.get_subfield('9').try(:value).presence
@@ -592,7 +683,7 @@ module Mabmapper
592
683
 
593
684
  superorders
594
685
  .map(&:presence)
595
- .delete_if { |element| element[:ht_number].blank? }
686
+ .delete_if { |element| element[:label].blank? }
596
687
  .each do |element|
597
688
  # remove 'not sort' indicators from label
598
689
  element[:label].try(:gsub!, /<<|>>/, '')
@@ -705,6 +796,33 @@ module Mabmapper
705
796
  type
706
797
  end
707
798
 
799
+ #
800
+ # Description
801
+ #
802
+ field :description do
803
+ descriptions = []
804
+
805
+ # 405 - Erscheinungsverlauf von Zeitschriften
806
+ descriptions << doc.field('405', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
807
+
808
+ # 522 - Teilungsvermerk bei fortlaufenden Sammelwerken
809
+ descriptions << doc.field('522', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
810
+
811
+ # 523 - Erscheinungsverlauf von Monos
812
+ descriptions << doc.field('523', ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
813
+
814
+ (501..519).each do |f|
815
+ descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ')
816
+ end
817
+
818
+ (536..537).each do |f|
819
+ descriptions << doc.field("#{f}", ind2: '1').subfield(['a', 'p']).get.values(join_subfields: ': ') unless f == 537 && ref(:erscheinungsform) == "journal"
820
+ end
821
+
822
+ # Finally...
823
+ descriptions.flatten.map(&:presence).compact.uniq
824
+ end
825
+
708
826
  #
709
827
  # Delivery Catagory
710
828
  #
@@ -733,7 +851,7 @@ module Mabmapper
733
851
  # Notation
734
852
  #
735
853
  field :notation do
736
- doc.field('700', ind2: ' ').subfield('a').get.values(join_subfields: true)
854
+ doc.field('700', ind2: ' ').subfield('a').get.values(join_subfields: '')
737
855
  end
738
856
 
739
857
  field :notation_sort do
@@ -761,7 +879,16 @@ module Mabmapper
761
879
  all_stack = fields.map{|f| f.subfields.find {|sf| sf.name == 'b' && sf.value.match(/02|03|04|07/)}.present?}.all?
762
880
 
763
881
  # Zeitschriftensignatur (haben Vorrgang, falls vorhanden)
764
- signatures << doc.field('200', ind2: ' ').subfield('f').get.value.try(:gsub, ' ', '')
882
+ #
883
+ # Achtung, bei Feld 200 handelt es sich um einen Aleph-Expand. Dieses Feld ist an den beiden leeren Indikatoren zu erkennen.
884
+ # Darüber hinaus kann dieses Feld mehrfach vorkommen. Wir nehmen an, dass Subfeld 0 eine Art Zählung angibt, weshalb dort
885
+ # ein Wert von '1' zu bevorzugen ist.
886
+ #
887
+ signatures << doc.field('200', ind1: ' ', ind2: ' ').get.fields
888
+ .select { |f| f.get_subfield('f').present? }
889
+ .select { |f| (value = f.get_subfield('0').try(:value)) == '1' || value.nil? }
890
+ .map { |f| f.get_subfield('f').value.try(:gsub, ' ', '') }
891
+ .first.presence
765
892
 
766
893
  # Wenn alle Exemplare im Magzin stehen, dann nimm nur die erste signatur
767
894
  if all_stack
@@ -803,6 +930,27 @@ module Mabmapper
803
930
  # Stücktitel Signatur
804
931
  signatures << doc.field('100', ind2: ' ').subfield('a').get.value
805
932
 
933
+ # Some additional love for journal signatures
934
+ signatures.map! do |signature|
935
+ # if this is a journal signature
936
+ if signature.try(:[], /\d+[A-Za-z]\d+$/).present?
937
+ # unless there is a leading standortkennziffer
938
+ unless signature.starts_with?('P')
939
+ standort_kennziffer = if (loc_standort_kennziffer = doc.field('LOC').subfield('b').get.value).present?
940
+ loc_standort_kennziffer
941
+ elsif (f105a = doc.field('105').subfield('a').get.value).present?
942
+ f105a
943
+ end
944
+
945
+ standort_kennziffer.present? ? "P#{standort_kennziffer}/#{signature}".gsub(/\/\//, '/') : signature
946
+ else
947
+ signature
948
+ end.downcase.capitalize # last but not least make journal signatures like P10/34T24 to P10/34t24
949
+ else
950
+ signature
951
+ end
952
+ end
953
+
806
954
  # Fertig. Wir nehmen die erste Signatur zur Anzeige
807
955
  signatures.flatten.map(&:presence).compact.uniq.first
808
956
  end
@@ -813,9 +961,10 @@ module Mabmapper
813
961
  # Stücktitel Signatur
814
962
  signatures << doc.field('100', ind2: ' ').subfield('a').get.value
815
963
  # Zeitschriftensignatur
816
- signatures << doc.field('200', ind2: ' ').subfield('f').get.value
964
+ signatures << doc.field('200', ind1: ' ', ind2: ' ').subfield('f').get.values
817
965
 
818
- signatures.flatten.map(&:presence).compact.map do |signature|
966
+ signatures = signatures.flatten.map(&:presence).compact
967
+ .map do |signature|
819
968
  _signature = signature
820
969
  .gsub(/\A\//, '') # remove leading '/' for some journal signatures
821
970
  .gsub(/\s+/, '') # remove spaces for some journal signatures (e.g. 'P 10/34 t 26')
@@ -826,7 +975,31 @@ module Mabmapper
826
975
 
827
976
  # for journals which only have one single signature with leading 'Pxx/' like 'P10/34M3' create 'Pxx/'-less version also
828
977
  _signature_array.push _signature.gsub(/\AP\d+\//, '')
829
- end.flatten.uniq
978
+ end.flatten
979
+
980
+ # if any signature is a journal signature
981
+ if (journal_signature = signatures.select { |signature| signature.try(:[], /\d+[A-Za-z]\d+$/).present? }.first).present?
982
+ if signatures.none? { |signature| signature.starts_with? 'P' }
983
+ # TODO: code duplication with :signature
984
+ standort_kennziffer = if (loc_standort_kennziffer = doc.field('LOC').subfield('b').get.value).present?
985
+ loc_standort_kennziffer
986
+ elsif (f105a = doc.field('105').subfield('a').get.value).present?
987
+ f105a
988
+ end
989
+
990
+ if standort_kennziffer.present?
991
+ signatures << "P#{standort_kennziffer}/#{journal_signature}".gsub(/\/\//, '/')
992
+ end
993
+ end
994
+ end
995
+
996
+ signatures.map! do |signature|
997
+ is_journal_signature = signature.match(/(P\d\d\/)?\d\d?[a-zA-Z]\d\d?/)
998
+ spaced_journal_signature = signature.gsub(/\AP(\d\d)/, 'P \1').gsub(/(\d\d?)([a-zA-Z])(\d\d?)/, '\1 \2 \3') if is_journal_signature
999
+ [signature, spaced_journal_signature]
1000
+ end.flatten!
1001
+
1002
+ signatures.flatten.map(&:presence).compact.uniq
830
1003
  end
831
1004
 
832
1005
  #
@@ -839,7 +1012,7 @@ module Mabmapper
839
1012
  field :resource_link do
840
1013
  fulltext_links = []
841
1014
 
842
- links = doc.field('655').subfield(['u', '3', 'z', 't']).get
1015
+ links = doc.field('655').subfield(['u', '3', 'z', 't']).get.fields
843
1016
  links.each do |link|
844
1017
  url = link.get_subfield('u').try(:value)
845
1018
  subfield_3 = link.get_subfield('3') # HBZ Inhaltsverzeichnisse
@@ -862,7 +1035,7 @@ module Mabmapper
862
1035
  field :link_to_toc do
863
1036
  toc_links = []
864
1037
 
865
- links = doc.field('655').subfield(['u', '3', 'z', 't']).get
1038
+ links = doc.field('655').subfield(['u', '3', 'z', 't']).get.fields
866
1039
  links.each do |link|
867
1040
  url = link.get_subfield('u').try(:value)
868
1041
  subfield_3 = link.get_subfield('3') # HBZ Inhaltsverzeichnisse
@@ -903,7 +1076,7 @@ module Mabmapper
903
1076
  # f - Signatur
904
1077
  r = []
905
1078
 
906
- fields = doc.field('200', ind2: ' ').subfield(['0', 'a', 'b', 'c', 'e', 'f']).get
1079
+ fields = doc.field('200', ind2: ' ').subfield(['0', 'a', 'b', 'c', 'e', 'f']).get.fields
907
1080
  fields.each do |field|
908
1081
  field_0 = field.get_subfield('0')
909
1082
  field_a = field.get_subfield('a')
@@ -1032,19 +1205,37 @@ module Mabmapper
1032
1205
  ].select { |superorder| superorder[:label].present? }.map(&:to_json).presence
1033
1206
  end
1034
1207
 
1208
+ field :local_comment do
1209
+ doc.field('125', ind1: ' ', ind2: ' ').subfield(['_', 'a']).get.fields.map(&:values).flatten.uniq.presence
1210
+ end
1211
+
1035
1212
  #
1036
- # doc
1213
+ # additional_data
1214
+ # ( a complex data structure to be stored by the search engine; avoids to touch the normalization rules everytime )
1037
1215
  #
1038
-
1039
- # a complex data structure to be stored by the search engine
1040
- field :doc do
1041
- doc = {
1042
- :redactional_remark => ref(:redactional_remark)
1216
+ field :additional_data do
1217
+ additional_data = {
1218
+ author_statement: ref(:author_statement),
1219
+ corporate_body_contributor_display: ref(:corporate_body_contributor_display),
1220
+ corporate_body_creator_display: ref(:corporate_body_creator_display),
1221
+ local_comment: ref(:local_comment),
1222
+ person_contributor_display: ref(:person_contributor_display),
1223
+ person_creator_display: ref(:person_creator_display),
1224
+ redactional_remark: ref(:redactional_remark)
1225
+
1043
1226
  }
1044
1227
  .inject({}) { |hash, (key, value)| hash[key] = value if value.present?; hash }
1045
-
1046
- doc.to_json if doc.present?
1228
+
1229
+ additional_data.to_json if additional_data.present?
1047
1230
  end
1048
1231
 
1232
+ #
1233
+ # mab
1234
+ #
1235
+ #field :mab do
1236
+ # (filtered_xml = doc.xml.clone).xpath('/OAI-PMH/ListRecords/record/metadata/record/datafield[@tag="TXT" or @tag="PLK" or @tag="PSW" or @tag="PPE"]').remove
1237
+ # Base64.strict_encode64(ActiveSupport::Gzip.compress(filtered_xml, Zlib::BEST_COMPRESSION)) unless filtered_xml.nil?
1238
+ #end
1239
+
1049
1240
  end
1050
1241
  end
data/lib/mabmapper/cli.rb CHANGED
@@ -1,7 +1,6 @@
1
1
  #
2
2
  # The command line interface class
3
3
  #
4
- require 'rubygems/test_utilities'
5
4
  require 'mabmapper/elasticsearch_writer'
6
5
  require 'mabmapper/tar_writer'
7
6
 
@@ -76,6 +76,8 @@ module Mabmapper
76
76
 
77
77
  # @see: http://www.dan-manges.com/blog/ruby-dsls-instance-eval-with-delegation
78
78
  class Field
79
+ attr_accessor :engine
80
+
79
81
  def initialize(name, &block)
80
82
  @name = name.to_s
81
83
  @proc = block
@@ -6,6 +6,8 @@ module Mabmapper
6
6
  class Document
7
7
  include QueryHelper
8
8
 
9
+ attr_accessor :xml
10
+
9
11
  def initialize(contents)
10
12
  @xml = Nokogiri::XML(contents)
11
13
  @xml.remove_namespaces!
@@ -85,7 +85,7 @@ module Mabmapper
85
85
 
86
86
  options = [*value].map do |value|
87
87
  if value
88
- negation = value.starts_with?('-') and value.length > 1
88
+ negation = value.starts_with?('-') && value.length > 1
89
89
  global_negation = true if negation
90
90
 
91
91
  negation ? "not(@#{name}='#{value.slice(1..-1)}')" : "@#{name}='#{value}'"
@@ -1,3 +1,3 @@
1
1
  module Mabmapper
2
- VERSION = "1.0.0.pre15"
2
+ VERSION = "1.0.0.pre16"
3
3
  end
data/mabmapper.gemspec CHANGED
@@ -27,7 +27,8 @@ Gem::Specification.new do |gem|
27
27
  gem.add_dependency('oj', '~> 2.1.4')
28
28
  gem.add_dependency('stringex', '~> 2.1.0')
29
29
 
30
- gem.add_development_dependency('minitest', '~> 4.7.5')
31
- gem.add_development_dependency('pry', '~> 0.9.12.2')
32
- gem.add_development_dependency('pry-nav', '~> 0.2.3')
30
+ gem.add_development_dependency('minitest', '~> 4.7.5')
31
+ gem.add_development_dependency('pry', '0.9.12.2') # stuck to 0.9.12.2 due to repl color issue
32
+ gem.add_development_dependency('pry-nav', '~> 0.2.3')
33
+ gem.add_development_dependency('pry-syntax-hacks', '~> 0.0.6')
33
34
  end