pennmarc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,49 @@
1
+ { "id":["FRANKLIN_9910148543503681"],
2
+ "grouped_id":["18521155!FRANKLIN_9910148543503681"],
3
+ "record_source_id":[1],
4
+ "record_source_f":["Penn"],
5
+ "nocirc_f_stored":["none"],
6
+ "alma_mms_id":["9910148543503681"],
7
+ "oclc_id":["18521155"],
8
+ "cluster_id":["18521155"],
9
+ "marcrecord_xml_stored_single_large":["<record><leader>01908cam a2200469 a 4500</leader><controlfield tag=\"005\">20220609191957.0</controlfield><controlfield tag=\"008\">890224t19891989nyua 001 0 eng </controlfield><controlfield tag=\"001\">9910148543503681</controlfield><datafield tag=\"010\" ind1=\" \" ind2=\" \"><subfield code=\"a\"> 88029825 </subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)ocm18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CStRLIN)PAUG89-B7246</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CaOTULAS)185188489</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"9\">AHA6856</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">1014854</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(PU)1014854-penndb-Voyager</subfield></datafield><datafield tag=\"040\" ind1=\" \" ind2=\" \"><subfield code=\"b\">eng</subfield><subfield code=\"d\">CSt-B</subfield></datafield><datafield tag=\"043\" ind1=\" \" ind2=\" \"><subfield code=\"a\">n-us---</subfield></datafield><datafield tag=\"050\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield></datafield><datafield tag=\"082\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">343.7306/8</subfield><subfield code=\"a\">347.30368</subfield><subfield code=\"2\">19</subfield></datafield><datafield tag=\"090\" ind1=\" \" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield><subfield code=\"i\">11/30/89 CTZ</subfield></datafield><datafield tag=\"245\" ind1=\"0\" ind2=\"4\"><subfield code=\"a\">The Coopers &amp; Lybrand guide to business tax strategies and planning /</subfield><subfield code=\"c\">by the partners of Coopers &amp; Lybrand.</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"1\"><subfield code=\"a\">New York :</subfield><subfield code=\"b\">Simon and Schuster,</subfield><subfield code=\"c\">[1989]</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"4\"><subfield code=\"c\">©1989</subfield></datafield><datafield tag=\"300\" ind1=\" \" ind2=\" \"><subfield code=\"a\">x, 198 pages :</subfield><subfield code=\"b\">illustrations ;</subfield><subfield code=\"c\">24 cm</subfield></datafield><datafield tag=\"336\" ind1=\" \" ind2=\" \"><subfield code=\"a\">text</subfield><subfield code=\"b\">txt</subfield><subfield code=\"2\">rdacontent</subfield></datafield><datafield tag=\"337\" ind1=\" \" ind2=\" \"><subfield code=\"a\">unmediated</subfield><subfield code=\"b\">n</subfield><subfield code=\"2\">rdamedia</subfield></datafield><datafield tag=\"338\" ind1=\" \" ind2=\" \"><subfield code=\"a\">volume</subfield><subfield code=\"b\">nc</subfield><subfield code=\"2\">rdacarrier</subfield></datafield><datafield tag=\"500\" ind1=\" \" ind2=\" \"><subfield code=\"a\">Edited by Jonathan J. Davies and others.</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2009118044</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/842634</subfield></datafield><datafield tag=\"651\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">United States.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1204155</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Tax planning</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2008112546</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Tax planning.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1143815</subfield></datafield><datafield tag=\"700\" ind1=\"1\" ind2=\" \"><subfield code=\"a\">Davies, Jonathan J.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n82126416</subfield></datafield><datafield tag=\"710\" ind1=\"2\" ind2=\" \"><subfield code=\"a\">Coopers &amp; Lybrand.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n79063025</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Guide to business tax strategies and planning.</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Business tax strategies and planning</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Tax strategies and planning.</subfield></datafield><datafield tag=\"902\" ind1=\" \" ind2=\" \"><subfield code=\"a\">MARCIVE 2022</subfield></datafield><datafield tag=\"950\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield><datafield tag=\"955\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"c\">1</subfield><subfield code=\"q\">89-B7246-1</subfield><subfield code=\"r\">[01043 6272]</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield></record>"],
10
+ "access_f_stored":["At the library"],
11
+ "format_f_stored":["Book"],
12
+ "author_creator_xfacet2_input":["nDavies, Jonathan J.","nCoopers & Lybrand"],
13
+ "subject_search":["Business enterprises Taxation United States. http://id.loc.gov/authorities/subjects/sh2009118044","Business enterprises Taxation. fast http://id.worldcat.org/fast/842634","United States. fast http://id.worldcat.org/fast/1204155","Tax planning United States. http://id.loc.gov/authorities/subjects/sh2008112546","Tax planning. fast http://id.worldcat.org/fast/1143815"],
14
+ "toplevel_subject_f":["Business enterprises","Business enterprises","United States","Tax planning","Tax planning"],
15
+ "call_number_xfacet":["{\"raw\":\"KF6450 .C59 1989\"}"],
16
+ "language_f_stored":["English"],
17
+ "language_search":["English"],
18
+ "library_f_stored":["LIBRA"],
19
+ "specific_location_f_stored":["LIBRA"],
20
+ "classification_f_stored":["K - Law"],
21
+ "title_1_search":["The Coopers & Lybrand guide to business tax strategies and planning /"],
22
+ "title_2_search":["The Coopers & Lybrand guide to business tax strategies and planning /","Guide to business tax strategies and planning.","Business tax strategies and planning","Tax strategies and planning."],
23
+ "author_creator_2_search":["Davies, Jonathan J. http://id.loc.gov/authorities/names/n82126416","Jonathan J. Davies http://id.loc.gov/authorities/names/n82126416","Coopers & Lybrand. http://id.loc.gov/authorities/names/n79063025","http://id.loc.gov/authorities/names/n79063025"],
24
+ "title":["The Coopers & Lybrand guide to business tax strategies and planning"],
25
+ "title_xfacet":["{\"raw\":{\"prefix\":\"The \",\"filing\":\"Coopers & Lybrand guide to business tax strategies and planning / \"}}"],
26
+ "title_nssort":["Coopers & Lybrand guide to business tax strategies and planning / The "],
27
+ "title_sort_tl":["Coopers & Lybrand guide to business tax strategies and planning / "],
28
+ "publication_a":["New York : Simon and Schuster, [1989] , ©1989"],
29
+ "elvl_rank_isort":[0],
30
+ "hld_count_isort":[1],
31
+ "itm_count_isort":[1],
32
+ "empty_hld_count_isort":[0],
33
+ "subject_xfacet2_input":["sBusiness enterprises--Taxation--United States","{\"val\":\"Business enterprises--Taxation\",\"prefix\":\"f\"}","{\"val\":\"United States\",\"prefix\":\"f\"}","sTax planning--United States","{\"val\":\"Tax planning\",\"prefix\":\"f\"}"],
34
+ "recently_added_isort":[1496742800],
35
+ "last_update_isort":[1654802397],
36
+ "publication_date_ssort":["1989"],
37
+ "pub_min_dtsort":["1989-01-01T00:00:00Z"],
38
+ "pub_max_dtsort":["1990-01-01T00:00:00Z"],
39
+ "content_min_dtsort":["1989-01-01T00:00:00Z"],
40
+ "content_max_dtsort":["1990-01-01T00:00:00Z"],
41
+ "publication_date_f_stored":["1980s"],
42
+ "publication_dr":["[1989 TO 1989]"],
43
+ "content_dr":["[1989 TO 1989]"],
44
+ "call_number_search":["KF6450 .C59 1989"],
45
+ "physical_holdings_json":["[{\"holding_id\":\"22418068440003681\",\"location\":\"stor\",\"classification_part\":\"KF6450\",\"item_part\":\".C59 1989\"}]"],
46
+ "corporate_author_search":["Coopers & Lybrand."],
47
+ "place_of_publication_search":["New York :"],
48
+ "publisher_search":["Simon and Schuster,"]
49
+ }
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ # MARC encoding level
4
+ # See: https://www.oclc.org/bibformats/en/fixedfield/elvl.html
5
+ # Not sure how this is used
6
+ module PennMARC
7
+ module EncodingLevel
8
+ # Official MARC codes (https://www.loc.gov/marc/bibliographic/bdleader.html)
9
+ FULL = ' '
10
+ FULL_NOT_EXAMINED = '1'
11
+ UNFULL_NOT_EXAMINED = '2'
12
+ ABBREVIATED = '3'
13
+ CORE = '4'
14
+ PRELIMINARY = '5'
15
+ MINIMAL = '7'
16
+ PREPUBLICATION = '8'
17
+ UNKNOWN = 'u'
18
+ NOT_APPLICABLE = 'z'
19
+
20
+ # OCLC extension codes (https://www.oclc.org/bibformats/en/fixedfield/elvl.html)
21
+ OCLC_FULL = 'I'
22
+ OCLC_MINIMAL = 'K'
23
+ OCLC_BATCH_LEGACY = 'L'
24
+ OCLC_BATCH = 'M'
25
+ OCLC_SOURCE_DELETED = 'J'
26
+
27
+ RANK = {
28
+ # top 4 (per nelsonrr), do not differentiate among "good" records
29
+ FULL => 0,
30
+ FULL_NOT_EXAMINED => 0, # 1
31
+ OCLC_FULL => 0, # 2
32
+ CORE => 0, # 3
33
+ UNFULL_NOT_EXAMINED => 4,
34
+ ABBREVIATED => 5,
35
+ PRELIMINARY => 6,
36
+ MINIMAL => 7,
37
+ OCLC_MINIMAL => 8,
38
+ OCLC_BATCH => 9,
39
+ OCLC_BATCH_LEGACY => 10,
40
+ OCLC_SOURCE_DELETED => 11
41
+ }.freeze
42
+ end
43
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Constants for Alma's MARC enrichment
4
+ # MARC enrichment is performed during the Alma Publishing process
5
+ # @see https://developers.exlibrisgroup.com/alma/apis/docs/bibs/R0VUIC9hbG1hd3MvdjEvYmlicy97bW1zX2lkfQ==/
6
+ # Alma documentation for these added fields
7
+ module PennMARC
8
+ module EnrichedMarc
9
+ # terminology follows the Publishing Profile screen
10
+ TAG_HOLDING = 'hld'
11
+ TAG_ITEM = 'itm'
12
+ TAG_ELECTRONIC_INVENTORY = 'prt'
13
+ TAG_DIGITAL_INVENTORY = 'dig'
14
+
15
+ # these are 852 subfield codes; terminology comes from MARC spec
16
+ SUB_HOLDING_SHELVING_LOCATION = 'c'
17
+ SUB_HOLDING_SEQUENCE_NUMBER = '8'
18
+ SUB_HOLDING_CLASSIFICATION_PART = 'h'
19
+ SUB_HOLDING_ITEM_PART = 'i'
20
+
21
+ SUB_ITEM_CURRENT_LOCATION = 'g'
22
+ SUB_ITEM_CALL_NUMBER_TYPE = 'h'
23
+ SUB_ITEM_CALL_NUMBER = 'i'
24
+ SUB_ITEM_DATE_CREATED = 'q'
25
+
26
+ SUB_ELEC_PORTFOLIO_PID = 'a'
27
+ SUB_ELEC_ACCESS_URL = 'b'
28
+ SUB_ELEC_COLLECTION_NAME = 'c'
29
+ SUB_ELEC_COVERAGE = 'g'
30
+
31
+ # TODO: evaluate this in context of changed boundwiths processing
32
+ # a subfield code NOT used by the MARC 21 spec for 852 holdings records.
33
+ # we add this subfield during preprocessing to store boundwith record IDs.
34
+ SUB_BOUND_WITH_ID = 'y'
35
+ end
36
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Shared values for controlling inclusion of subject or genre headings
5
+ module HeadingControl
6
+ # These codes are expected to be found in sf2 when the indicator2 value is 7, indicating "source specified". There
7
+ # are some sources whose headings we don't want to display.
8
+ ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
9
+ local/osu mesh ndlsh nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
10
+ end
11
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Citation-y stuff
5
+ class Citation < Helper
6
+ class << self
7
+ # Field 510 contains Citations or references to published bibliographic descriptions,
8
+ # reviews, abstracts, or indexes of the content of the described item. Used to specify where an item has been
9
+ # cited or reviewed. Citations or references may be given in a brief form (i.e., using generally recognizable
10
+ # abbreviations, etc.). The actual text of a published description is not recorded in field 510 but rather in
11
+ # field 520 (Summary, Etc. Note).
12
+ # https://www.loc.gov/marc/bibliographic/bd510.html
13
+ # @param [MARC::Record] record
14
+ # @return [Array] array of citations and any linked alternates
15
+ def cited_in_show(record)
16
+ datafield_and_linked_alternate(record, '510')
17
+ end
18
+
19
+ # Field 524 is the Preferred Citation of Described Materials Note. It is the Format for the citation of the
20
+ # described materials that is preferred by the custodian. When multiple citation formats exist for the same item,
21
+ # each is recorded in a separate occurrence of field 524. The note is sometimes displayed and/or printed with an
22
+ # introductory phrase that is generated as a display constant based on the first indicator value.
23
+ # https://www.loc.gov/marc/bibliographic/bd524.html
24
+ # @param [MARC::Record] record
25
+ # @return [Array] array of citation of described materials note and any linked alternates
26
+ def cite_as_show(record)
27
+ datafield_and_linked_alternate(record, '524')
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,237 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Creator & Author field processing. Main methods pull from 110 and 111 fields. Display methods here no longer
5
+ # return data structures intended for generating "search" links, but some of the split subfield parsing remains from
6
+ # ported methods in case we need to replicate that functionality.
7
+ # @todo can there ever be multiple 100 fields?
8
+ # can ǂe and ǂ4 both be used at the same time? seems to result in duplicate values
9
+ class Creator < Helper
10
+ class << self
11
+ # Main tags for Author/Creator information
12
+ TAGS = %w[100 110].freeze
13
+ # Aux tags for Author/Creator information, for use in search_aux method
14
+ AUX_TAGS = %w[100 110 111 400 410 411 700 710 711 800 810 811].freeze
15
+
16
+ # Author/Creator search field. Includes all subfield values (even ǂ0 URIs) from
17
+ # {https://www.oclc.org/bibformats/en/1xx/100.html 100 Main Entry--Personal Name} and
18
+ # {https://www.oclc.org/bibformats/en/1xx/110.html 110 Main Entry--Corporate Name}. Maps any relator codes found
19
+ # in ǂ4. To better handle name searches, returns names as both "First Last" and "Last, First" if a comma is found
20
+ # in ǂa. Also indexes any linked values in the 880. Some of the search fields remain incomplete and may need to be
21
+ # further investigated and ported when search result relevancy is considered.
22
+ # @todo this seems bad - why include relator labels? URIs? punctuation? leaving mostly as-is for now,
23
+ # but this should be reexamined in the relevancy-tuning phase. URIs should def be removed. and shouldn't
24
+ # indicator1 tell us the order of the name?
25
+ # @note ported from get_author_creator_1_search_values
26
+ # @param [MARC::Record] record
27
+ # @param [Hash] relator_mapping
28
+ # @return [Array<String>] array of author/creator values for indexing
29
+ def search(record, relator_mapping)
30
+ acc = record.fields(TAGS).map do |field|
31
+ pieces = field.filter_map do |sf|
32
+ if sf.code == 'a'
33
+ convert_name_order(sf.value)
34
+ elsif %w[a 1 4 6 8].exclude?(sf.code)
35
+ sf.value
36
+ elsif sf.code == '4'
37
+ relator = translate_relator(sf.value, relator_mapping)
38
+ next if relator.blank?
39
+
40
+ relator
41
+ end
42
+ end
43
+ value = join_and_squish(pieces)
44
+ if value.end_with?('.') || value.end_with?('-')
45
+ value
46
+ else
47
+ "#{value}."
48
+ end
49
+ end
50
+ # a second iteration over the same fields produces name entries with the names not reordered
51
+ acc += record.fields(TAGS).map do |field|
52
+ pieces = field.filter_map do |sf|
53
+ if !%w[4 6 8].member?(sf.code)
54
+ sf.value
55
+ elsif sf.code == '4'
56
+ relator = translate_relator(sf.value, relator_mapping)
57
+ next if relator.blank?
58
+
59
+ relator
60
+ end
61
+ end
62
+ value = join_and_squish(pieces)
63
+ if value.end_with?('.') || value.end_with?('-')
64
+ value
65
+ else
66
+ "#{value}."
67
+ end
68
+ end
69
+ acc += record.fields(%w[880]).filter_map do |field|
70
+ next unless field.any? { |sf| sf.code == '6' && sf.value.in?(%w[100 110]) }
71
+
72
+ suba = field.find_all(&subfield_in?(%w[a])).map do |sf|
73
+ convert_name_order(sf.value)
74
+ end.first
75
+ oth = join_and_squish(field.find_all(&subfield_not_in?(%w[6 8 a t])).map(&:value))
76
+ join_and_squish [suba, oth]
77
+ end
78
+ acc.uniq
79
+ end
80
+
81
+ # Auxiliary Author/Creator search field
82
+ # @note ported from get_author_creator_2_search_values
83
+ # @todo port this later
84
+ # @param [MARC::Record] record
85
+ # @return [Array<String>] array of extended author/creator values for indexing
86
+ def search_aux(record); end
87
+
88
+ # All author/creator values for display (like #show, but multivalued?) - no 880 linkage
89
+ # @note ported from get_author_creator_values (indexed as author_creator_a) - shown on results page
90
+ # @param [MARC::Record] record
91
+ # @param [Hash] relator_mapping
92
+ # @return [Array<String>] array of author/creator values for display
93
+ def values(record, relator_mapping)
94
+ record.fields(TAGS).map do |field|
95
+ name_from_main_entry(field, relator_mapping)
96
+ end
97
+ end
98
+
99
+ # Author/Creator values for display
100
+ # @todo ported from get_author_display - used on record show page. porting did not include 4, e or w values,
101
+ # which were part of the link object as 'append' values in franklin
102
+ # @param [MARC::Record] record
103
+ # @return [Array<String>] array of author/creator values for display
104
+ def show(record)
105
+ fields = record.fields(TAGS)
106
+ fields += record.fields('880').select { |field| subfield_value_in?(field, '6', TAGS) }
107
+ fields.filter_map do |field|
108
+ join_subfields(field, &subfield_not_in?(%w[0 1 4 6 8 e w]))
109
+ end
110
+ end
111
+
112
+ # Author/Creator sort. Does not map and include any relator
113
+ # codes.
114
+ # @todo This includes any URI from ǂ0 which could help to disambiguate in sorts, but ǂ1 is excluded...
115
+ # @note ported from get_author_creator_sort_values
116
+ # @param [MARC::Record] record
117
+ # @return [String] string with author/creator value for sorting
118
+ def sort(record)
119
+ field = record.fields(TAGS).first
120
+ join_subfields(field, &subfield_not_in?(%w[1 4 6 8 e]))
121
+ end
122
+
123
+ # Author/Creator for faceting. Grabs values from a plethora of fields, joins defined subfields, then trims some
124
+ # punctuation (@see trim_punctuation)
125
+ # @todo should trim_punctuation apply to each subfield value, or the joined values? i think the joined values
126
+ # @note ported from author_creator_xfacet2_input - is this the best choice? check the copyField declarations -
127
+ # franklin uses author_creator_f
128
+ # @param [MARC::Record] record
129
+ # @return [Array<String>] array of author/creator values for faceting
130
+ def facet(record)
131
+ source_map = {
132
+ 100 => 'abcdjq', 110 => 'abcdjq', 111 => 'abcen',
133
+ 700 => 'abcdjq', 710 => 'abcdjq', 711 => 'abcen',
134
+ 800 => 'abcdjq', 810 => 'abcdjq', 811 => 'abcen'
135
+ }
136
+ source_map.flat_map do |field_num, subfields|
137
+ record.fields(field_num.to_s).map do |field|
138
+ trim_punctuation(join_subfields(field, &subfield_in?(subfields.split(''))))
139
+ end
140
+ end
141
+ end
142
+
143
+ # Conference for display, intended for results display
144
+ # @note ported from get_conference_values
145
+ # @param [MARC::Record] record
146
+ # @param [Hash] relator_map
147
+ # @return [Array<String>] array of conference values
148
+ def conference_show(record, relator_map)
149
+ record.fields('111').filter_map do |field|
150
+ name_from_main_entry field, relator_map
151
+ end
152
+ end
153
+
154
+ # Conference detailed display, intended for record show page.
155
+ # @note ported from get_conference_values
156
+ # @todo what is ǂi for?
157
+ # @param [MARC::Record] record
158
+ # @return [Array<String>] array of conference values
159
+ def conference_detail_show(record)
160
+ values = record.fields(%w[111 711]).filter_map do |field|
161
+ next unless field.indicator2.in? ['', ' ']
162
+
163
+ conf = if subfield_undefined? field, 'i'
164
+ join_subfields field, &subfield_not_in?(%w[0 4 5 6 8 e j w])
165
+ else
166
+ ''
167
+ end
168
+ conf_extra = join_subfields field, &subfield_in?(%w[e j w])
169
+ join_and_squish [conf, conf_extra].compact_blank
170
+ end
171
+ values + record.fields('880').filter_map do |field|
172
+ next unless subfield_value_in? field, '6', %w[111 711]
173
+
174
+ next if subfield_defined? field, 'i'
175
+
176
+ conf = join_subfields(field, &subfield_not_in?(%w[0 4 5 6 8 e j w]))
177
+ conf_extra = join_subfields(field, &subfield_in?(%w[4 e j w]))
178
+ join_and_squish [conf, conf_extra]
179
+ end
180
+ end
181
+
182
+ # @todo this supports "Conference" fielded search and may not be needed
183
+ # @note see get_conference_search_values
184
+ def conference_search(record); end
185
+
186
+ private
187
+
188
+ # Trim punctuation method extracted from Traject macro, to ensure consistent output
189
+ # @todo move to Util?
190
+ # @param [String] string
191
+ # @return [String] string with relevant punctuation removed
192
+ def trim_punctuation(string)
193
+ return string unless string
194
+
195
+ string = string.sub(%r{ *[ ,/;:] *\Z}, '')
196
+
197
+ # trailing period if it is preceded by at least three letters (possibly preceded and followed by whitespace)
198
+ string = string.sub(/( *[[:word:]]{3,})\. *\Z/, '\1')
199
+
200
+ # single square bracket characters if they are the start and/or end chars and there are no internal square
201
+ # brackets.
202
+ string = string.sub(/\A\[?([^\[\]]+)\]?\Z/, '\1')
203
+
204
+ # trim any leading or trailing whitespace
205
+ string.strip
206
+ end
207
+
208
+ # Extract the information we care about from 1xx fields, map relator codes, and use appropriate punctuation
209
+ # @param [MARC::Field] field
210
+ # @return [String] joined subfield values for value from field
211
+ def name_from_main_entry(field, mapping)
212
+ s = field.filter_map do |sf|
213
+ if %w[0 1 4 6 8].exclude?(sf.code)
214
+ " #{sf.value}"
215
+ elsif sf.code == '4'
216
+ relator = translate_relator(sf.value, mapping)
217
+ next if relator.blank?
218
+
219
+ ", #{relator}"
220
+ end
221
+ end.join
222
+ (s + (!%w[. -].member?(s.last) ? '.' : '')).squish
223
+ end
224
+
225
+ # Convert "Lastname, First" to "First Lastname"
226
+ # @param [String] name value for processing
227
+ # @return [String]
228
+ def convert_name_order(name)
229
+ return name unless name.include? ','
230
+
231
+ after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
232
+ before_comma = substring_before(name, ', ')
233
+ "#{after_comma} #{before_comma}".squish
234
+ end
235
+ end
236
+ end
237
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Parses Database Subject Category and Database Type local fields
5
+ class Database < Helper
6
+ # Database format type used to facet databases, found in
7
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
8
+ # local field 944} subfield 'a'.
9
+ DATABASES_FACET_VALUE = 'Database & Article Index'
10
+ # Penn Libraries' Community of Interest code used in
11
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
12
+ # local field 943} subfield '2'.
13
+ COI_CODE = 'penncoi'
14
+
15
+ class << self
16
+ # Retrieves database subtype (subfield 'b') from
17
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
18
+ # local field 944}. Only returns database subtype if Penn's Database facet value is present in subfield 'a'.
19
+ # @param [Marc::Record]
20
+ # @return [Array<string>] Array of types
21
+ def type(record)
22
+ record.fields('944').filter_map do |field|
23
+ # skip unless specified database format type present
24
+ next unless subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/)
25
+
26
+ type = field.find { |subfield| subfield.code == 'b' }
27
+ type&.value
28
+ end
29
+ end
30
+
31
+ # Retrieves database subject category/communities of interest (subfield 'a') from
32
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
33
+ # local field 943}. Only returns database subject category if Penn's Community of Interest code is present in
34
+ # subfield '2'.
35
+ # @param [Marc::Record]
36
+ # @return [Array<string>] Array of categories
37
+ def db_category(record)
38
+ return [] unless curated_db?(record)
39
+
40
+ record.fields('943').filter_map do |field|
41
+ # skip unless Community of Interest code is in subfield '2'
42
+ next unless subfield_value?(field, '2', /#{COI_CODE}/)
43
+
44
+ category = field.find { |subfield| subfield.code == 'a' }
45
+ category&.value
46
+ end
47
+ end
48
+
49
+ # Concatenates database subject category with database sub subject category in the format "category--subcategory"
50
+ # if both values are present.
51
+ # Retrieves both values respectively from subfield 'a' and subfield 'b' of
52
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
53
+ # local field 943}. Only returns subcategory if Penn's Community of Interest code is present in subfield '2'.
54
+ # @note return value differs from legacy implementation. This version only returns ["category--subcategory"] or
55
+ # an empty array.
56
+ # @param [Marc::Record]
57
+ # @return [Array<string>] Array of "category--subcategory"
58
+ def db_subcategory(record)
59
+ return [] unless curated_db?(record)
60
+
61
+ record.fields('943').filter_map do |field|
62
+ # skip unless Community of Interest code is in subfield '2'
63
+ next unless subfield_value?(field, '2', /#{COI_CODE}/)
64
+
65
+ category = field.find { |subfield| subfield.code == 'a' }
66
+
67
+ # skip unless category is present
68
+ next unless category.present?
69
+
70
+ subcategory = field.find { |subfield| subfield.code == 'b' }
71
+
72
+ # skip unless subcategory is present
73
+ next unless subcategory.present?
74
+
75
+ "#{category.value}--#{subcategory.value}"
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ # Determines if Database format type is format type used to facet databases
82
+ # @param [Marc::Record]
83
+ # @return [TrueClass, FalseClass]
84
+ def curated_db?(record)
85
+ record.fields('944').any? { |field| subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/) }
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Parser methods for extracting date info as DateTime objects
5
+ class Date < Helper
6
+ class << self
7
+ # Retrieve publication date (Date 1) from {https://www.loc.gov/marc/bibliographic/bd008a.html 008 field}.
8
+ # Publication date is a four-digit year found in position 7-10 and may contain 'u' characters to represent
9
+ # partially known dates. We replace any occurrences of 'u' with '0' before converting to DateTime object.
10
+ # @param [MARC::Record] record
11
+ # @return [DateTime, nil] The publication date, or nil if date found in record is invalid
12
+ def publication(record)
13
+ record.fields('008').filter_map do |field|
14
+ four_digit_year = sanitize_partially_known_date(field.value[7, 4], '0')
15
+
16
+ next unless four_digit_year.present?
17
+
18
+ DateTime.new(four_digit_year.to_i)
19
+ end.first
20
+ end
21
+
22
+ # Retrieve date added (subfield 'q') from enriched marc 'itm' field.
23
+ # {PennMARC::EnrichedMarc} maps enriched marc fields and subfields created during Alma publishing.
24
+ # @param [MARC::Record] record
25
+ # @return [DateTime, nil] The date added, or nil if date found in record is invalid
26
+ def added(record)
27
+ record.fields(EnrichedMarc::TAG_ITEM).flat_map do |field|
28
+ field.filter_map do |subfield|
29
+ # skip unless field has date created subfield
30
+ next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
31
+
32
+ # On 2022-05-02, this field value (as exported in enriched publishing
33
+ # job from Alma) began truncating time to day-level granularity. We have
34
+ # no guarantee that this won't switch back in the future, so for the
35
+ # foreseeable future we should support both formats.
36
+
37
+ format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
38
+
39
+ DateTime.strptime(subfield.value, format)
40
+
41
+ rescue StandardError => e
42
+ puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
43
+ nil
44
+ end
45
+ end.max
46
+ end
47
+
48
+ # Retrieve date last updated from {https://www.loc.gov/marc/bibliographic/bd005.html 005 field}.
49
+ # Date last updated is a sixteen character String recorded in
50
+ # {https://www.iso.org/iso-8601-date-and-time-format.html ISO 8601} format.
51
+ # @param [MARC::Record] record
52
+ # @return [DateTime, nil] The date last updated, or nil if date found in record is invalid
53
+ def last_updated(record)
54
+ record.fields('005').filter_map do |field|
55
+ date_time_string = field.value
56
+
57
+ next if date_time_string.blank?
58
+
59
+ next if date_time_string.start_with?('0000')
60
+
61
+ DateTime.iso8601(date_time_string).to_datetime
62
+
63
+ rescue ArgumentError => e
64
+ puts "Error parsing last updated date: #{date_time_string} - #{e}"
65
+ nil
66
+ end.first
67
+ end
68
+
69
+ private
70
+
71
+ # Sanitizes a partially known date string by replacing any 'u' occurrences with a specified replacement value.
72
+ # @param [String] date The date string in '%Y' format, potentially containing 'u' characters.
73
+ # @param [String] replacement The value with which to replace 'u' occurrences in the date string.
74
+ # @return [String, nil] The sanitized date string with 'u' characters replaced by the replacement value,
75
+ # or nil if the date string does not match the expected format.
76
+ def sanitize_partially_known_date(date, replacement)
77
+ # early return unless date begins with zero or more digits followed by zero or more occurrences of 'u'
78
+ return unless /^[0-9]*u*$/.match?(date)
79
+
80
+ # replace 'u' occurrences with the specified replacement value
81
+ date.gsub(/u/, replacement)
82
+ end
83
+ end
84
+ end
85
+ end