pennmarc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,49 @@
1
+ { "id":["FRANKLIN_9910148543503681"],
2
+ "grouped_id":["18521155!FRANKLIN_9910148543503681"],
3
+ "record_source_id":[1],
4
+ "record_source_f":["Penn"],
5
+ "nocirc_f_stored":["none"],
6
+ "alma_mms_id":["9910148543503681"],
7
+ "oclc_id":["18521155"],
8
+ "cluster_id":["18521155"],
9
+ "marcrecord_xml_stored_single_large":["<record><leader>01908cam a2200469 a 4500</leader><controlfield tag=\"005\">20220609191957.0</controlfield><controlfield tag=\"008\">890224t19891989nyua 001 0 eng </controlfield><controlfield tag=\"001\">9910148543503681</controlfield><datafield tag=\"010\" ind1=\" \" ind2=\" \"><subfield code=\"a\"> 88029825 </subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)ocm18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CStRLIN)PAUG89-B7246</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CaOTULAS)185188489</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"9\">AHA6856</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">1014854</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(PU)1014854-penndb-Voyager</subfield></datafield><datafield tag=\"040\" ind1=\" \" ind2=\" \"><subfield code=\"b\">eng</subfield><subfield code=\"d\">CSt-B</subfield></datafield><datafield tag=\"043\" ind1=\" \" ind2=\" \"><subfield code=\"a\">n-us---</subfield></datafield><datafield tag=\"050\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield></datafield><datafield tag=\"082\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">343.7306/8</subfield><subfield code=\"a\">347.30368</subfield><subfield code=\"2\">19</subfield></datafield><datafield tag=\"090\" ind1=\" \" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield><subfield code=\"i\">11/30/89 CTZ</subfield></datafield><datafield tag=\"245\" ind1=\"0\" ind2=\"4\"><subfield code=\"a\">The Coopers &amp; Lybrand guide to business tax strategies and planning /</subfield><subfield code=\"c\">by the partners of Coopers &amp; Lybrand.</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"1\"><subfield code=\"a\">New York :</subfield><subfield code=\"b\">Simon and Schuster,</subfield><subfield code=\"c\">[1989]</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"4\"><subfield code=\"c\">©1989</subfield></datafield><datafield tag=\"300\" ind1=\" \" ind2=\" \"><subfield code=\"a\">x, 198 pages :</subfield><subfield code=\"b\">illustrations ;</subfield><subfield code=\"c\">24 cm</subfield></datafield><datafield tag=\"336\" ind1=\" \" ind2=\" \"><subfield code=\"a\">text</subfield><subfield code=\"b\">txt</subfield><subfield code=\"2\">rdacontent</subfield></datafield><datafield tag=\"337\" ind1=\" \" ind2=\" \"><subfield code=\"a\">unmediated</subfield><subfield code=\"b\">n</subfield><subfield code=\"2\">rdamedia</subfield></datafield><datafield tag=\"338\" ind1=\" \" ind2=\" \"><subfield code=\"a\">volume</subfield><subfield code=\"b\">nc</subfield><subfield code=\"2\">rdacarrier</subfield></datafield><datafield tag=\"500\" ind1=\" \" ind2=\" \"><subfield code=\"a\">Edited by Jonathan J. Davies and others.</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2009118044</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/842634</subfield></datafield><datafield tag=\"651\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">United States.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1204155</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Tax planning</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2008112546</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Tax planning.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1143815</subfield></datafield><datafield tag=\"700\" ind1=\"1\" ind2=\" \"><subfield code=\"a\">Davies, Jonathan J.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n82126416</subfield></datafield><datafield tag=\"710\" ind1=\"2\" ind2=\" \"><subfield code=\"a\">Coopers &amp; Lybrand.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n79063025</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Guide to business tax strategies and planning.</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Business tax strategies and planning</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Tax strategies and planning.</subfield></datafield><datafield tag=\"902\" ind1=\" \" ind2=\" \"><subfield code=\"a\">MARCIVE 2022</subfield></datafield><datafield tag=\"950\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield><datafield tag=\"955\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"c\">1</subfield><subfield code=\"q\">89-B7246-1</subfield><subfield code=\"r\">[01043 6272]</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield></record>"],
10
+ "access_f_stored":["At the library"],
11
+ "format_f_stored":["Book"],
12
+ "author_creator_xfacet2_input":["nDavies, Jonathan J.","nCoopers & Lybrand"],
13
+ "subject_search":["Business enterprises Taxation United States. http://id.loc.gov/authorities/subjects/sh2009118044","Business enterprises Taxation. fast http://id.worldcat.org/fast/842634","United States. fast http://id.worldcat.org/fast/1204155","Tax planning United States. http://id.loc.gov/authorities/subjects/sh2008112546","Tax planning. fast http://id.worldcat.org/fast/1143815"],
14
+ "toplevel_subject_f":["Business enterprises","Business enterprises","United States","Tax planning","Tax planning"],
15
+ "call_number_xfacet":["{\"raw\":\"KF6450 .C59 1989\"}"],
16
+ "language_f_stored":["English"],
17
+ "language_search":["English"],
18
+ "library_f_stored":["LIBRA"],
19
+ "specific_location_f_stored":["LIBRA"],
20
+ "classification_f_stored":["K - Law"],
21
+ "title_1_search":["The Coopers & Lybrand guide to business tax strategies and planning /"],
22
+ "title_2_search":["The Coopers & Lybrand guide to business tax strategies and planning /","Guide to business tax strategies and planning.","Business tax strategies and planning","Tax strategies and planning."],
23
+ "author_creator_2_search":["Davies, Jonathan J. http://id.loc.gov/authorities/names/n82126416","Jonathan J. Davies http://id.loc.gov/authorities/names/n82126416","Coopers & Lybrand. http://id.loc.gov/authorities/names/n79063025","http://id.loc.gov/authorities/names/n79063025"],
24
+ "title":["The Coopers & Lybrand guide to business tax strategies and planning"],
25
+ "title_xfacet":["{\"raw\":{\"prefix\":\"The \",\"filing\":\"Coopers & Lybrand guide to business tax strategies and planning / \"}}"],
26
+ "title_nssort":["Coopers & Lybrand guide to business tax strategies and planning / The "],
27
+ "title_sort_tl":["Coopers & Lybrand guide to business tax strategies and planning / "],
28
+ "publication_a":["New York : Simon and Schuster, [1989] , ©1989"],
29
+ "elvl_rank_isort":[0],
30
+ "hld_count_isort":[1],
31
+ "itm_count_isort":[1],
32
+ "empty_hld_count_isort":[0],
33
+ "subject_xfacet2_input":["sBusiness enterprises--Taxation--United States","{\"val\":\"Business enterprises--Taxation\",\"prefix\":\"f\"}","{\"val\":\"United States\",\"prefix\":\"f\"}","sTax planning--United States","{\"val\":\"Tax planning\",\"prefix\":\"f\"}"],
34
+ "recently_added_isort":[1496742800],
35
+ "last_update_isort":[1654802397],
36
+ "publication_date_ssort":["1989"],
37
+ "pub_min_dtsort":["1989-01-01T00:00:00Z"],
38
+ "pub_max_dtsort":["1990-01-01T00:00:00Z"],
39
+ "content_min_dtsort":["1989-01-01T00:00:00Z"],
40
+ "content_max_dtsort":["1990-01-01T00:00:00Z"],
41
+ "publication_date_f_stored":["1980s"],
42
+ "publication_dr":["[1989 TO 1989]"],
43
+ "content_dr":["[1989 TO 1989]"],
44
+ "call_number_search":["KF6450 .C59 1989"],
45
+ "physical_holdings_json":["[{\"holding_id\":\"22418068440003681\",\"location\":\"stor\",\"classification_part\":\"KF6450\",\"item_part\":\".C59 1989\"}]"],
46
+ "corporate_author_search":["Coopers & Lybrand."],
47
+ "place_of_publication_search":["New York :"],
48
+ "publisher_search":["Simon and Schuster,"]
49
+ }
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ # MARC encoding level
4
+ # See: https://www.oclc.org/bibformats/en/fixedfield/elvl.html
5
+ # Not sure how this is used
6
+ module PennMARC
7
+ module EncodingLevel
8
+ # Official MARC codes (https://www.loc.gov/marc/bibliographic/bdleader.html)
9
+ FULL = ' '
10
+ FULL_NOT_EXAMINED = '1'
11
+ UNFULL_NOT_EXAMINED = '2'
12
+ ABBREVIATED = '3'
13
+ CORE = '4'
14
+ PRELIMINARY = '5'
15
+ MINIMAL = '7'
16
+ PREPUBLICATION = '8'
17
+ UNKNOWN = 'u'
18
+ NOT_APPLICABLE = 'z'
19
+
20
+ # OCLC extension codes (https://www.oclc.org/bibformats/en/fixedfield/elvl.html)
21
+ OCLC_FULL = 'I'
22
+ OCLC_MINIMAL = 'K'
23
+ OCLC_BATCH_LEGACY = 'L'
24
+ OCLC_BATCH = 'M'
25
+ OCLC_SOURCE_DELETED = 'J'
26
+
27
+ RANK = {
28
+ # top 4 (per nelsonrr), do not differentiate among "good" records
29
+ FULL => 0,
30
+ FULL_NOT_EXAMINED => 0, # 1
31
+ OCLC_FULL => 0, # 2
32
+ CORE => 0, # 3
33
+ UNFULL_NOT_EXAMINED => 4,
34
+ ABBREVIATED => 5,
35
+ PRELIMINARY => 6,
36
+ MINIMAL => 7,
37
+ OCLC_MINIMAL => 8,
38
+ OCLC_BATCH => 9,
39
+ OCLC_BATCH_LEGACY => 10,
40
+ OCLC_SOURCE_DELETED => 11
41
+ }.freeze
42
+ end
43
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Constants for Alma's MARC enrichment
4
+ # MARC enrichment is performed during the Alma Publishing process
5
+ # @see https://developers.exlibrisgroup.com/alma/apis/docs/bibs/R0VUIC9hbG1hd3MvdjEvYmlicy97bW1zX2lkfQ==/
6
+ # Alma documentation for these added fields
7
+ module PennMARC
8
+ module EnrichedMarc
9
+ # terminology follows the Publishing Profile screen
10
+ TAG_HOLDING = 'hld'
11
+ TAG_ITEM = 'itm'
12
+ TAG_ELECTRONIC_INVENTORY = 'prt'
13
+ TAG_DIGITAL_INVENTORY = 'dig'
14
+
15
+ # these are 852 subfield codes; terminology comes from MARC spec
16
+ SUB_HOLDING_SHELVING_LOCATION = 'c'
17
+ SUB_HOLDING_SEQUENCE_NUMBER = '8'
18
+ SUB_HOLDING_CLASSIFICATION_PART = 'h'
19
+ SUB_HOLDING_ITEM_PART = 'i'
20
+
21
+ SUB_ITEM_CURRENT_LOCATION = 'g'
22
+ SUB_ITEM_CALL_NUMBER_TYPE = 'h'
23
+ SUB_ITEM_CALL_NUMBER = 'i'
24
+ SUB_ITEM_DATE_CREATED = 'q'
25
+
26
+ SUB_ELEC_PORTFOLIO_PID = 'a'
27
+ SUB_ELEC_ACCESS_URL = 'b'
28
+ SUB_ELEC_COLLECTION_NAME = 'c'
29
+ SUB_ELEC_COVERAGE = 'g'
30
+
31
+ # TODO: evaluate this in context of changed boundwiths processing
32
+ # a subfield code NOT used by the MARC 21 spec for 852 holdings records.
33
+ # we add this subfield during preprocessing to store boundwith record IDs.
34
+ SUB_BOUND_WITH_ID = 'y'
35
+ end
36
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Shared values for controlling inclusion of subject or genre headings
5
+ module HeadingControl
6
+ # These codes are expected to be found in sf2 when the indicator2 value is 7, indicating "source specified". There
7
+ # are some sources whose headings we don't want to display.
8
+ ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
9
+ local/osu mesh ndlsh nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
10
+ end
11
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Citation-y stuff
5
+ class Citation < Helper
6
+ class << self
7
+ # Field 510 contains Citations or references to published bibliographic descriptions,
8
+ # reviews, abstracts, or indexes of the content of the described item. Used to specify where an item has been
9
+ # cited or reviewed. Citations or references may be given in a brief form (i.e., using generally recognizable
10
+ # abbreviations, etc.). The actual text of a published description is not recorded in field 510 but rather in
11
+ # field 520 (Summary, Etc. Note).
12
+ # https://www.loc.gov/marc/bibliographic/bd510.html
13
+ # @param [MARC::Record] record
14
+ # @return [Array] array of citations and any linked alternates
15
+ def cited_in_show(record)
16
+ datafield_and_linked_alternate(record, '510')
17
+ end
18
+
19
+ # Field 524 is the Preferred Citation of Described Materials Note. It is the Format for the citation of the
20
+ # described materials that is preferred by the custodian. When multiple citation formats exist for the same item,
21
+ # each is recorded in a separate occurrence of field 524. The note is sometimes displayed and/or printed with an
22
+ # introductory phrase that is generated as a display constant based on the first indicator value.
23
+ # https://www.loc.gov/marc/bibliographic/bd524.html
24
+ # @param [MARC::Record] record
25
+ # @return [Array] array of citation of described materials note and any linked alternates
26
+ def cite_as_show(record)
27
+ datafield_and_linked_alternate(record, '524')
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,237 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Do Creator & Author field processing. Main methods pull from 110 and 111 fields. Display methods here no longer
5
+ # return data structures intended for generating "search" links, but some of the split subfield parsing remains from
6
+ # ported methods in case we need to replicate that functionality.
7
+ # @todo can there ever be multiple 100 fields?
8
+ # can ǂe and ǂ4 both be used at the same time? seems to result in duplicate values
9
+ class Creator < Helper
10
+ class << self
11
+ # Main tags for Author/Creator information
12
+ TAGS = %w[100 110].freeze
13
+ # Aux tags for Author/Creator information, for use in search_aux method
14
+ AUX_TAGS = %w[100 110 111 400 410 411 700 710 711 800 810 811].freeze
15
+
16
+ # Author/Creator search field. Includes all subfield values (even ǂ0 URIs) from
17
+ # {https://www.oclc.org/bibformats/en/1xx/100.html 100 Main Entry--Personal Name} and
18
+ # {https://www.oclc.org/bibformats/en/1xx/110.html 110 Main Entry--Corporate Name}. Maps any relator codes found
19
+ # in ǂ4. To better handle name searches, returns names as both "First Last" and "Last, First" if a comma is found
20
+ # in ǂa. Also indexes any linked values in the 880. Some of the search fields remain incomplete and may need to be
21
+ # further investigated and ported when search result relevancy is considered.
22
+ # @todo this seems bad - why include relator labels? URIs? punctuation? leaving mostly as-is for now,
23
+ # but this should be reexamined in the relevancy-tuning phase. URIs should def be removed. and shouldn't
24
+ # indicator1 tell us the order of the name?
25
+ # @note ported from get_author_creator_1_search_values
26
+ # @param [MARC::Record] record
27
+ # @param [Hash] relator_mapping
28
+ # @return [Array<String>] array of author/creator values for indexing
29
+ def search(record, relator_mapping)
30
+ acc = record.fields(TAGS).map do |field|
31
+ pieces = field.filter_map do |sf|
32
+ if sf.code == 'a'
33
+ convert_name_order(sf.value)
34
+ elsif %w[a 1 4 6 8].exclude?(sf.code)
35
+ sf.value
36
+ elsif sf.code == '4'
37
+ relator = translate_relator(sf.value, relator_mapping)
38
+ next if relator.blank?
39
+
40
+ relator
41
+ end
42
+ end
43
+ value = join_and_squish(pieces)
44
+ if value.end_with?('.') || value.end_with?('-')
45
+ value
46
+ else
47
+ "#{value}."
48
+ end
49
+ end
50
+ # a second iteration over the same fields produces name entries with the names not reordered
51
+ acc += record.fields(TAGS).map do |field|
52
+ pieces = field.filter_map do |sf|
53
+ if !%w[4 6 8].member?(sf.code)
54
+ sf.value
55
+ elsif sf.code == '4'
56
+ relator = translate_relator(sf.value, relator_mapping)
57
+ next if relator.blank?
58
+
59
+ relator
60
+ end
61
+ end
62
+ value = join_and_squish(pieces)
63
+ if value.end_with?('.') || value.end_with?('-')
64
+ value
65
+ else
66
+ "#{value}."
67
+ end
68
+ end
69
+ acc += record.fields(%w[880]).filter_map do |field|
70
+ next unless field.any? { |sf| sf.code == '6' && sf.value.in?(%w[100 110]) }
71
+
72
+ suba = field.find_all(&subfield_in?(%w[a])).map do |sf|
73
+ convert_name_order(sf.value)
74
+ end.first
75
+ oth = join_and_squish(field.find_all(&subfield_not_in?(%w[6 8 a t])).map(&:value))
76
+ join_and_squish [suba, oth]
77
+ end
78
+ acc.uniq
79
+ end
80
+
81
+ # Auxiliary Author/Creator search field
82
+ # @note ported from get_author_creator_2_search_values
83
+ # @todo port this later
84
+ # @param [MARC::Record] record
85
+ # @return [Array<String>] array of extended author/creator values for indexing
86
+ def search_aux(record); end
87
+
88
+ # All author/creator values for display (like #show, but multivalued?) - no 880 linkage
89
+ # @note ported from get_author_creator_values (indexed as author_creator_a) - shown on results page
90
+ # @param [MARC::Record] record
91
+ # @param [Hash] relator_mapping
92
+ # @return [Array<String>] array of author/creator values for display
93
+ def values(record, relator_mapping)
94
+ record.fields(TAGS).map do |field|
95
+ name_from_main_entry(field, relator_mapping)
96
+ end
97
+ end
98
+
99
+ # Author/Creator values for display
100
+ # @todo ported from get_author_display - used on record show page. porting did not include 4, e or w values,
101
+ # which were part of the link object as 'append' values in franklin
102
+ # @param [MARC::Record] record
103
+ # @return [Array<String>] array of author/creator values for display
104
+ def show(record)
105
+ fields = record.fields(TAGS)
106
+ fields += record.fields('880').select { |field| subfield_value_in?(field, '6', TAGS) }
107
+ fields.filter_map do |field|
108
+ join_subfields(field, &subfield_not_in?(%w[0 1 4 6 8 e w]))
109
+ end
110
+ end
111
+
112
+ # Author/Creator sort. Does not map and include any relator
113
+ # codes.
114
+ # @todo This includes any URI from ǂ0 which could help to disambiguate in sorts, but ǂ1 is excluded...
115
+ # @note ported from get_author_creator_sort_values
116
+ # @param [MARC::Record] record
117
+ # @return [String] string with author/creator value for sorting
118
+ def sort(record)
119
+ field = record.fields(TAGS).first
120
+ join_subfields(field, &subfield_not_in?(%w[1 4 6 8 e]))
121
+ end
122
+
123
+ # Author/Creator for faceting. Grabs values from a plethora of fields, joins defined subfields, then trims some
124
+ # punctuation (@see trim_punctuation)
125
+ # @todo should trim_punctuation apply to each subfield value, or the joined values? i think the joined values
126
+ # @note ported from author_creator_xfacet2_input - is this the best choice? check the copyField declarations -
127
+ # franklin uses author_creator_f
128
+ # @param [MARC::Record] record
129
+ # @return [Array<String>] array of author/creator values for faceting
130
+ def facet(record)
131
+ source_map = {
132
+ 100 => 'abcdjq', 110 => 'abcdjq', 111 => 'abcen',
133
+ 700 => 'abcdjq', 710 => 'abcdjq', 711 => 'abcen',
134
+ 800 => 'abcdjq', 810 => 'abcdjq', 811 => 'abcen'
135
+ }
136
+ source_map.flat_map do |field_num, subfields|
137
+ record.fields(field_num.to_s).map do |field|
138
+ trim_punctuation(join_subfields(field, &subfield_in?(subfields.split(''))))
139
+ end
140
+ end
141
+ end
142
+
143
+ # Conference for display, intended for results display
144
+ # @note ported from get_conference_values
145
+ # @param [MARC::Record] record
146
+ # @param [Hash] relator_map
147
+ # @return [Array<String>] array of conference values
148
+ def conference_show(record, relator_map)
149
+ record.fields('111').filter_map do |field|
150
+ name_from_main_entry field, relator_map
151
+ end
152
+ end
153
+
154
+ # Conference detailed display, intended for record show page.
155
+ # @note ported from get_conference_values
156
+ # @todo what is ǂi for?
157
+ # @param [MARC::Record] record
158
+ # @return [Array<String>] array of conference values
159
+ def conference_detail_show(record)
160
+ values = record.fields(%w[111 711]).filter_map do |field|
161
+ next unless field.indicator2.in? ['', ' ']
162
+
163
+ conf = if subfield_undefined? field, 'i'
164
+ join_subfields field, &subfield_not_in?(%w[0 4 5 6 8 e j w])
165
+ else
166
+ ''
167
+ end
168
+ conf_extra = join_subfields field, &subfield_in?(%w[e j w])
169
+ join_and_squish [conf, conf_extra].compact_blank
170
+ end
171
+ values + record.fields('880').filter_map do |field|
172
+ next unless subfield_value_in? field, '6', %w[111 711]
173
+
174
+ next if subfield_defined? field, 'i'
175
+
176
+ conf = join_subfields(field, &subfield_not_in?(%w[0 4 5 6 8 e j w]))
177
+ conf_extra = join_subfields(field, &subfield_in?(%w[4 e j w]))
178
+ join_and_squish [conf, conf_extra]
179
+ end
180
+ end
181
+
182
+ # @todo this supports "Conference" fielded search and may not be needed
183
+ # @note see get_conference_search_values
184
+ def conference_search(record); end
185
+
186
+ private
187
+
188
+ # Trim punctuation method extracted from Traject macro, to ensure consistent output
189
+ # @todo move to Util?
190
+ # @param [String] string
191
+ # @return [String] string with relevant punctuation removed
192
+ def trim_punctuation(string)
193
+ return string unless string
194
+
195
+ string = string.sub(%r{ *[ ,/;:] *\Z}, '')
196
+
197
+ # trailing period if it is preceded by at least three letters (possibly preceded and followed by whitespace)
198
+ string = string.sub(/( *[[:word:]]{3,})\. *\Z/, '\1')
199
+
200
+ # single square bracket characters if they are the start and/or end chars and there are no internal square
201
+ # brackets.
202
+ string = string.sub(/\A\[?([^\[\]]+)\]?\Z/, '\1')
203
+
204
+ # trim any leading or trailing whitespace
205
+ string.strip
206
+ end
207
+
208
+ # Extract the information we care about from 1xx fields, map relator codes, and use appropriate punctuation
209
+ # @param [MARC::Field] field
210
+ # @return [String] joined subfield values for value from field
211
+ def name_from_main_entry(field, mapping)
212
+ s = field.filter_map do |sf|
213
+ if %w[0 1 4 6 8].exclude?(sf.code)
214
+ " #{sf.value}"
215
+ elsif sf.code == '4'
216
+ relator = translate_relator(sf.value, mapping)
217
+ next if relator.blank?
218
+
219
+ ", #{relator}"
220
+ end
221
+ end.join
222
+ (s + (!%w[. -].member?(s.last) ? '.' : '')).squish
223
+ end
224
+
225
+ # Convert "Lastname, First" to "First Lastname"
226
+ # @param [String] name value for processing
227
+ # @return [String]
228
+ def convert_name_order(name)
229
+ return name unless name.include? ','
230
+
231
+ after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
232
+ before_comma = substring_before(name, ', ')
233
+ "#{after_comma} #{before_comma}".squish
234
+ end
235
+ end
236
+ end
237
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Parses Database Subject Category and Database Type local fields
5
+ class Database < Helper
6
+ # Database format type used to facet databases, found in
7
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
8
+ # local field 944} subfield 'a'.
9
+ DATABASES_FACET_VALUE = 'Database & Article Index'
10
+ # Penn Libraries' Community of Interest code used in
11
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
12
+ # local field 943} subfield '2'.
13
+ COI_CODE = 'penncoi'
14
+
15
+ class << self
16
+ # Retrieves database subtype (subfield 'b') from
17
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
18
+ # local field 944}. Only returns database subtype if Penn's Database facet value is present in subfield 'a'.
19
+ # @param [Marc::Record]
20
+ # @return [Array<string>] Array of types
21
+ def type(record)
22
+ record.fields('944').filter_map do |field|
23
+ # skip unless specified database format type present
24
+ next unless subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/)
25
+
26
+ type = field.find { |subfield| subfield.code == 'b' }
27
+ type&.value
28
+ end
29
+ end
30
+
31
+ # Retrieves database subject category/communities of interest (subfield 'a') from
32
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
33
+ # local field 943}. Only returns database subject category if Penn's Community of Interest code is present in
34
+ # subfield '2'.
35
+ # @param [Marc::Record]
36
+ # @return [Array<string>] Array of categories
37
+ def db_category(record)
38
+ return [] unless curated_db?(record)
39
+
40
+ record.fields('943').filter_map do |field|
41
+ # skip unless Community of Interest code is in subfield '2'
42
+ next unless subfield_value?(field, '2', /#{COI_CODE}/)
43
+
44
+ category = field.find { |subfield| subfield.code == 'a' }
45
+ category&.value
46
+ end
47
+ end
48
+
49
+ # Concatenates database subject category with database sub subject category in the format "category--subcategory"
50
+ # if both values are present.
51
+ # Retrieves both values respectively from subfield 'a' and subfield 'b' of
52
+ # {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
53
+ # local field 943}. Only returns subcategory if Penn's Community of Interest code is present in subfield '2'.
54
+ # @note return value differs from legacy implementation. This version only returns ["category--subcategory"] or
55
+ # an empty array.
56
+ # @param [Marc::Record]
57
+ # @return [Array<string>] Array of "category--subcategory"
58
+ def db_subcategory(record)
59
+ return [] unless curated_db?(record)
60
+
61
+ record.fields('943').filter_map do |field|
62
+ # skip unless Community of Interest code is in subfield '2'
63
+ next unless subfield_value?(field, '2', /#{COI_CODE}/)
64
+
65
+ category = field.find { |subfield| subfield.code == 'a' }
66
+
67
+ # skip unless category is present
68
+ next unless category.present?
69
+
70
+ subcategory = field.find { |subfield| subfield.code == 'b' }
71
+
72
+ # skip unless subcategory is present
73
+ next unless subcategory.present?
74
+
75
+ "#{category.value}--#{subcategory.value}"
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ # Determines if Database format type is format type used to facet databases
82
+ # @param [Marc::Record]
83
+ # @return [TrueClass, FalseClass]
84
+ def curated_db?(record)
85
+ record.fields('944').any? { |field| subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/) }
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module PennMARC
4
+ # Parser methods for extracting date info as DateTime objects
5
+ class Date < Helper
6
+ class << self
7
+ # Retrieve publication date (Date 1) from {https://www.loc.gov/marc/bibliographic/bd008a.html 008 field}.
8
+ # Publication date is a four-digit year found in position 7-10 and may contain 'u' characters to represent
9
+ # partially known dates. We replace any occurrences of 'u' with '0' before converting to DateTime object.
10
+ # @param [MARC::Record] record
11
+ # @return [DateTime, nil] The publication date, or nil if date found in record is invalid
12
+ def publication(record)
13
+ record.fields('008').filter_map do |field|
14
+ four_digit_year = sanitize_partially_known_date(field.value[7, 4], '0')
15
+
16
+ next unless four_digit_year.present?
17
+
18
+ DateTime.new(four_digit_year.to_i)
19
+ end.first
20
+ end
21
+
22
+ # Retrieve date added (subfield 'q') from enriched marc 'itm' field.
23
+ # {PennMARC::EnrichedMarc} maps enriched marc fields and subfields created during Alma publishing.
24
+ # @param [MARC::Record] record
25
+ # @return [DateTime, nil] The date added, or nil if date found in record is invalid
26
+ def added(record)
27
+ record.fields(EnrichedMarc::TAG_ITEM).flat_map do |field|
28
+ field.filter_map do |subfield|
29
+ # skip unless field has date created subfield
30
+ next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
31
+
32
+ # On 2022-05-02, this field value (as exported in enriched publishing
33
+ # job from Alma) began truncating time to day-level granularity. We have
34
+ # no guarantee that this won't switch back in the future, so for the
35
+ # foreseeable future we should support both formats.
36
+
37
+ format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
38
+
39
+ DateTime.strptime(subfield.value, format)
40
+
41
+ rescue StandardError => e
42
+ puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
43
+ nil
44
+ end
45
+ end.max
46
+ end
47
+
48
+ # Retrieve date last updated from {https://www.loc.gov/marc/bibliographic/bd005.html 005 field}.
49
+ # Date last updated is a sixteen character String recorded in
50
+ # {https://www.iso.org/iso-8601-date-and-time-format.html ISO 8601} format.
51
+ # @param [MARC::Record] record
52
+ # @return [DateTime, nil] The date last updated, or nil if date found in record is invalid
53
+ def last_updated(record)
54
+ record.fields('005').filter_map do |field|
55
+ date_time_string = field.value
56
+
57
+ next if date_time_string.blank?
58
+
59
+ next if date_time_string.start_with?('0000')
60
+
61
+ DateTime.iso8601(date_time_string).to_datetime
62
+
63
+ rescue ArgumentError => e
64
+ puts "Error parsing last updated date: #{date_time_string} - #{e}"
65
+ nil
66
+ end.first
67
+ end
68
+
69
+ private
70
+
71
+ # Sanitizes a partially known date string by replacing any 'u' occurrences with a specified replacement value.
72
+ # @param [String] date The date string in '%Y' format, potentially containing 'u' characters.
73
+ # @param [String] replacement The value with which to replace 'u' occurrences in the date string.
74
+ # @return [String, nil] The sanitized date string with 'u' characters replaced by the replacement value,
75
+ # or nil if the date string does not match the expected format.
76
+ def sanitize_partially_known_date(date, replacement)
77
+ # early return unless date begins with zero or more digits followed by zero or more occurrences of 'u'
78
+ return unless /^[0-9]*u*$/.match?(date)
79
+
80
+ # replace 'u' occurrences with the specified replacement value
81
+ date.gsub(/u/, replacement)
82
+ end
83
+ end
84
+ end
85
+ end