pennmarc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
{ "id":["FRANKLIN_9910148543503681"],
|
2
|
+
"grouped_id":["18521155!FRANKLIN_9910148543503681"],
|
3
|
+
"record_source_id":[1],
|
4
|
+
"record_source_f":["Penn"],
|
5
|
+
"nocirc_f_stored":["none"],
|
6
|
+
"alma_mms_id":["9910148543503681"],
|
7
|
+
"oclc_id":["18521155"],
|
8
|
+
"cluster_id":["18521155"],
|
9
|
+
"marcrecord_xml_stored_single_large":["<record><leader>01908cam a2200469 a 4500</leader><controlfield tag=\"005\">20220609191957.0</controlfield><controlfield tag=\"008\">890224t19891989nyua 001 0 eng </controlfield><controlfield tag=\"001\">9910148543503681</controlfield><datafield tag=\"010\" ind1=\" \" ind2=\" \"><subfield code=\"a\"> 88029825 </subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)ocm18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CStRLIN)PAUG89-B7246</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CaOTULAS)185188489</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"9\">AHA6856</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">1014854</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(PU)1014854-penndb-Voyager</subfield></datafield><datafield tag=\"040\" ind1=\" \" ind2=\" \"><subfield code=\"b\">eng</subfield><subfield code=\"d\">CSt-B</subfield></datafield><datafield tag=\"043\" ind1=\" \" ind2=\" \"><subfield code=\"a\">n-us---</subfield></datafield><datafield tag=\"050\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield></datafield><datafield tag=\"082\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">343.7306/8</subfield><subfield code=\"a\">347.30368</subfield><subfield code=\"2\">19</subfield></datafield><datafield tag=\"090\" ind1=\" \" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield><subfield code=\"i\">11/30/89 CTZ</subfield></datafield><datafield tag=\"245\" ind1=\"0\" ind2=\"4\"><subfield code=\"a\">The Coopers & Lybrand guide to business tax strategies and planning /</subfield><subfield code=\"c\">by the partners of Coopers & Lybrand.</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"1\"><subfield code=\"a\">New York :</subfield><subfield code=\"b\">Simon and Schuster,</subfield><subfield code=\"c\">[1989]</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"4\"><subfield code=\"c\">©1989</subfield></datafield><datafield tag=\"300\" ind1=\" \" ind2=\" \"><subfield code=\"a\">x, 198 pages :</subfield><subfield code=\"b\">illustrations ;</subfield><subfield code=\"c\">24 cm</subfield></datafield><datafield tag=\"336\" ind1=\" \" ind2=\" \"><subfield code=\"a\">text</subfield><subfield code=\"b\">txt</subfield><subfield code=\"2\">rdacontent</subfield></datafield><datafield tag=\"337\" ind1=\" \" ind2=\" \"><subfield code=\"a\">unmediated</subfield><subfield code=\"b\">n</subfield><subfield code=\"2\">rdamedia</subfield></datafield><datafield tag=\"338\" ind1=\" \" ind2=\" \"><subfield code=\"a\">volume</subfield><subfield code=\"b\">nc</subfield><subfield code=\"2\">rdacarrier</subfield></datafield><datafield tag=\"500\" ind1=\" \" ind2=\" \"><subfield code=\"a\">Edited by Jonathan J. Davies and others.</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2009118044</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/842634</subfield></datafield><datafield tag=\"651\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">United States.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1204155</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Tax planning</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2008112546</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Tax planning.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1143815</subfield></datafield><datafield tag=\"700\" ind1=\"1\" ind2=\" \"><subfield code=\"a\">Davies, Jonathan J.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n82126416</subfield></datafield><datafield tag=\"710\" ind1=\"2\" ind2=\" \"><subfield code=\"a\">Coopers & Lybrand.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n79063025</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Guide to business tax strategies and planning.</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Business tax strategies and planning</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Tax strategies and planning.</subfield></datafield><datafield tag=\"902\" ind1=\" \" ind2=\" \"><subfield code=\"a\">MARCIVE 2022</subfield></datafield><datafield tag=\"950\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield><datafield tag=\"955\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"c\">1</subfield><subfield code=\"q\">89-B7246-1</subfield><subfield code=\"r\">[01043 6272]</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield></record>"],
|
10
|
+
"access_f_stored":["At the library"],
|
11
|
+
"format_f_stored":["Book"],
|
12
|
+
"author_creator_xfacet2_input":["nDavies, Jonathan J.","nCoopers & Lybrand"],
|
13
|
+
"subject_search":["Business enterprises Taxation United States. http://id.loc.gov/authorities/subjects/sh2009118044","Business enterprises Taxation. fast http://id.worldcat.org/fast/842634","United States. fast http://id.worldcat.org/fast/1204155","Tax planning United States. http://id.loc.gov/authorities/subjects/sh2008112546","Tax planning. fast http://id.worldcat.org/fast/1143815"],
|
14
|
+
"toplevel_subject_f":["Business enterprises","Business enterprises","United States","Tax planning","Tax planning"],
|
15
|
+
"call_number_xfacet":["{\"raw\":\"KF6450 .C59 1989\"}"],
|
16
|
+
"language_f_stored":["English"],
|
17
|
+
"language_search":["English"],
|
18
|
+
"library_f_stored":["LIBRA"],
|
19
|
+
"specific_location_f_stored":["LIBRA"],
|
20
|
+
"classification_f_stored":["K - Law"],
|
21
|
+
"title_1_search":["The Coopers & Lybrand guide to business tax strategies and planning /"],
|
22
|
+
"title_2_search":["The Coopers & Lybrand guide to business tax strategies and planning /","Guide to business tax strategies and planning.","Business tax strategies and planning","Tax strategies and planning."],
|
23
|
+
"author_creator_2_search":["Davies, Jonathan J. http://id.loc.gov/authorities/names/n82126416","Jonathan J. Davies http://id.loc.gov/authorities/names/n82126416","Coopers & Lybrand. http://id.loc.gov/authorities/names/n79063025","http://id.loc.gov/authorities/names/n79063025"],
|
24
|
+
"title":["The Coopers & Lybrand guide to business tax strategies and planning"],
|
25
|
+
"title_xfacet":["{\"raw\":{\"prefix\":\"The \",\"filing\":\"Coopers & Lybrand guide to business tax strategies and planning / \"}}"],
|
26
|
+
"title_nssort":["Coopers & Lybrand guide to business tax strategies and planning / The "],
|
27
|
+
"title_sort_tl":["Coopers & Lybrand guide to business tax strategies and planning / "],
|
28
|
+
"publication_a":["New York : Simon and Schuster, [1989] , ©1989"],
|
29
|
+
"elvl_rank_isort":[0],
|
30
|
+
"hld_count_isort":[1],
|
31
|
+
"itm_count_isort":[1],
|
32
|
+
"empty_hld_count_isort":[0],
|
33
|
+
"subject_xfacet2_input":["sBusiness enterprises--Taxation--United States","{\"val\":\"Business enterprises--Taxation\",\"prefix\":\"f\"}","{\"val\":\"United States\",\"prefix\":\"f\"}","sTax planning--United States","{\"val\":\"Tax planning\",\"prefix\":\"f\"}"],
|
34
|
+
"recently_added_isort":[1496742800],
|
35
|
+
"last_update_isort":[1654802397],
|
36
|
+
"publication_date_ssort":["1989"],
|
37
|
+
"pub_min_dtsort":["1989-01-01T00:00:00Z"],
|
38
|
+
"pub_max_dtsort":["1990-01-01T00:00:00Z"],
|
39
|
+
"content_min_dtsort":["1989-01-01T00:00:00Z"],
|
40
|
+
"content_max_dtsort":["1990-01-01T00:00:00Z"],
|
41
|
+
"publication_date_f_stored":["1980s"],
|
42
|
+
"publication_dr":["[1989 TO 1989]"],
|
43
|
+
"content_dr":["[1989 TO 1989]"],
|
44
|
+
"call_number_search":["KF6450 .C59 1989"],
|
45
|
+
"physical_holdings_json":["[{\"holding_id\":\"22418068440003681\",\"location\":\"stor\",\"classification_part\":\"KF6450\",\"item_part\":\".C59 1989\"}]"],
|
46
|
+
"corporate_author_search":["Coopers & Lybrand."],
|
47
|
+
"place_of_publication_search":["New York :"],
|
48
|
+
"publisher_search":["Simon and Schuster,"]
|
49
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# MARC encoding level
|
4
|
+
# See: https://www.oclc.org/bibformats/en/fixedfield/elvl.html
|
5
|
+
# Not sure how this is used
|
6
|
+
module PennMARC
|
7
|
+
module EncodingLevel
|
8
|
+
# Official MARC codes (https://www.loc.gov/marc/bibliographic/bdleader.html)
|
9
|
+
FULL = ' '
|
10
|
+
FULL_NOT_EXAMINED = '1'
|
11
|
+
UNFULL_NOT_EXAMINED = '2'
|
12
|
+
ABBREVIATED = '3'
|
13
|
+
CORE = '4'
|
14
|
+
PRELIMINARY = '5'
|
15
|
+
MINIMAL = '7'
|
16
|
+
PREPUBLICATION = '8'
|
17
|
+
UNKNOWN = 'u'
|
18
|
+
NOT_APPLICABLE = 'z'
|
19
|
+
|
20
|
+
# OCLC extension codes (https://www.oclc.org/bibformats/en/fixedfield/elvl.html)
|
21
|
+
OCLC_FULL = 'I'
|
22
|
+
OCLC_MINIMAL = 'K'
|
23
|
+
OCLC_BATCH_LEGACY = 'L'
|
24
|
+
OCLC_BATCH = 'M'
|
25
|
+
OCLC_SOURCE_DELETED = 'J'
|
26
|
+
|
27
|
+
RANK = {
|
28
|
+
# top 4 (per nelsonrr), do not differentiate among "good" records
|
29
|
+
FULL => 0,
|
30
|
+
FULL_NOT_EXAMINED => 0, # 1
|
31
|
+
OCLC_FULL => 0, # 2
|
32
|
+
CORE => 0, # 3
|
33
|
+
UNFULL_NOT_EXAMINED => 4,
|
34
|
+
ABBREVIATED => 5,
|
35
|
+
PRELIMINARY => 6,
|
36
|
+
MINIMAL => 7,
|
37
|
+
OCLC_MINIMAL => 8,
|
38
|
+
OCLC_BATCH => 9,
|
39
|
+
OCLC_BATCH_LEGACY => 10,
|
40
|
+
OCLC_SOURCE_DELETED => 11
|
41
|
+
}.freeze
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Constants for Alma's MARC enrichment
|
4
|
+
# MARC enrichment is performed during the Alma Publishing process
|
5
|
+
# @see https://developers.exlibrisgroup.com/alma/apis/docs/bibs/R0VUIC9hbG1hd3MvdjEvYmlicy97bW1zX2lkfQ==/
|
6
|
+
# Alma documentation for these added fields
|
7
|
+
module PennMARC
|
8
|
+
module EnrichedMarc
|
9
|
+
# terminology follows the Publishing Profile screen
|
10
|
+
TAG_HOLDING = 'hld'
|
11
|
+
TAG_ITEM = 'itm'
|
12
|
+
TAG_ELECTRONIC_INVENTORY = 'prt'
|
13
|
+
TAG_DIGITAL_INVENTORY = 'dig'
|
14
|
+
|
15
|
+
# these are 852 subfield codes; terminology comes from MARC spec
|
16
|
+
SUB_HOLDING_SHELVING_LOCATION = 'c'
|
17
|
+
SUB_HOLDING_SEQUENCE_NUMBER = '8'
|
18
|
+
SUB_HOLDING_CLASSIFICATION_PART = 'h'
|
19
|
+
SUB_HOLDING_ITEM_PART = 'i'
|
20
|
+
|
21
|
+
SUB_ITEM_CURRENT_LOCATION = 'g'
|
22
|
+
SUB_ITEM_CALL_NUMBER_TYPE = 'h'
|
23
|
+
SUB_ITEM_CALL_NUMBER = 'i'
|
24
|
+
SUB_ITEM_DATE_CREATED = 'q'
|
25
|
+
|
26
|
+
SUB_ELEC_PORTFOLIO_PID = 'a'
|
27
|
+
SUB_ELEC_ACCESS_URL = 'b'
|
28
|
+
SUB_ELEC_COLLECTION_NAME = 'c'
|
29
|
+
SUB_ELEC_COVERAGE = 'g'
|
30
|
+
|
31
|
+
# TODO: evaluate this in context of changed boundwiths processing
|
32
|
+
# a subfield code NOT used by the MARC 21 spec for 852 holdings records.
|
33
|
+
# we add this subfield during preprocessing to store boundwith record IDs.
|
34
|
+
SUB_BOUND_WITH_ID = 'y'
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Shared values for controlling inclusion of subject or genre headings
|
5
|
+
module HeadingControl
|
6
|
+
# These codes are expected to be found in sf2 when the indicator2 value is 7, indicating "source specified". There
|
7
|
+
# are some sources whose headings we don't want to display.
|
8
|
+
ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
|
9
|
+
local/osu mesh ndlsh nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Do Citation-y stuff
|
5
|
+
class Citation < Helper
|
6
|
+
class << self
|
7
|
+
# Field 510 contains Citations or references to published bibliographic descriptions,
|
8
|
+
# reviews, abstracts, or indexes of the content of the described item. Used to specify where an item has been
|
9
|
+
# cited or reviewed. Citations or references may be given in a brief form (i.e., using generally recognizable
|
10
|
+
# abbreviations, etc.). The actual text of a published description is not recorded in field 510 but rather in
|
11
|
+
# field 520 (Summary, Etc. Note).
|
12
|
+
# https://www.loc.gov/marc/bibliographic/bd510.html
|
13
|
+
# @param [MARC::Record] record
|
14
|
+
# @return [Array] array of citations and any linked alternates
|
15
|
+
def cited_in_show(record)
|
16
|
+
datafield_and_linked_alternate(record, '510')
|
17
|
+
end
|
18
|
+
|
19
|
+
# Field 524 is the Preferred Citation of Described Materials Note. It is the Format for the citation of the
|
20
|
+
# described materials that is preferred by the custodian. When multiple citation formats exist for the same item,
|
21
|
+
# each is recorded in a separate occurrence of field 524. The note is sometimes displayed and/or printed with an
|
22
|
+
# introductory phrase that is generated as a display constant based on the first indicator value.
|
23
|
+
# https://www.loc.gov/marc/bibliographic/bd524.html
|
24
|
+
# @param [MARC::Record] record
|
25
|
+
# @return [Array] array of citation of described materials note and any linked alternates
|
26
|
+
def cite_as_show(record)
|
27
|
+
datafield_and_linked_alternate(record, '524')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,237 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Do Creator & Author field processing. Main methods pull from 110 and 111 fields. Display methods here no longer
|
5
|
+
# return data structures intended for generating "search" links, but some of the split subfield parsing remains from
|
6
|
+
# ported methods in case we need to replicate that functionality.
|
7
|
+
# @todo can there ever be multiple 100 fields?
|
8
|
+
# can ǂe and ǂ4 both be used at the same time? seems to result in duplicate values
|
9
|
+
class Creator < Helper
|
10
|
+
class << self
|
11
|
+
# Main tags for Author/Creator information
|
12
|
+
TAGS = %w[100 110].freeze
|
13
|
+
# Aux tags for Author/Creator information, for use in search_aux method
|
14
|
+
AUX_TAGS = %w[100 110 111 400 410 411 700 710 711 800 810 811].freeze
|
15
|
+
|
16
|
+
# Author/Creator search field. Includes all subfield values (even ǂ0 URIs) from
|
17
|
+
# {https://www.oclc.org/bibformats/en/1xx/100.html 100 Main Entry--Personal Name} and
|
18
|
+
# {https://www.oclc.org/bibformats/en/1xx/110.html 110 Main Entry--Corporate Name}. Maps any relator codes found
|
19
|
+
# in ǂ4. To better handle name searches, returns names as both "First Last" and "Last, First" if a comma is found
|
20
|
+
# in ǂa. Also indexes any linked values in the 880. Some of the search fields remain incomplete and may need to be
|
21
|
+
# further investigated and ported when search result relevancy is considered.
|
22
|
+
# @todo this seems bad - why include relator labels? URIs? punctuation? leaving mostly as-is for now,
|
23
|
+
# but this should be reexamined in the relevancy-tuning phase. URIs should def be removed. and shouldn't
|
24
|
+
# indicator1 tell us the order of the name?
|
25
|
+
# @note ported from get_author_creator_1_search_values
|
26
|
+
# @param [MARC::Record] record
|
27
|
+
# @param [Hash] relator_mapping
|
28
|
+
# @return [Array<String>] array of author/creator values for indexing
|
29
|
+
def search(record, relator_mapping)
|
30
|
+
acc = record.fields(TAGS).map do |field|
|
31
|
+
pieces = field.filter_map do |sf|
|
32
|
+
if sf.code == 'a'
|
33
|
+
convert_name_order(sf.value)
|
34
|
+
elsif %w[a 1 4 6 8].exclude?(sf.code)
|
35
|
+
sf.value
|
36
|
+
elsif sf.code == '4'
|
37
|
+
relator = translate_relator(sf.value, relator_mapping)
|
38
|
+
next if relator.blank?
|
39
|
+
|
40
|
+
relator
|
41
|
+
end
|
42
|
+
end
|
43
|
+
value = join_and_squish(pieces)
|
44
|
+
if value.end_with?('.') || value.end_with?('-')
|
45
|
+
value
|
46
|
+
else
|
47
|
+
"#{value}."
|
48
|
+
end
|
49
|
+
end
|
50
|
+
# a second iteration over the same fields produces name entries with the names not reordered
|
51
|
+
acc += record.fields(TAGS).map do |field|
|
52
|
+
pieces = field.filter_map do |sf|
|
53
|
+
if !%w[4 6 8].member?(sf.code)
|
54
|
+
sf.value
|
55
|
+
elsif sf.code == '4'
|
56
|
+
relator = translate_relator(sf.value, relator_mapping)
|
57
|
+
next if relator.blank?
|
58
|
+
|
59
|
+
relator
|
60
|
+
end
|
61
|
+
end
|
62
|
+
value = join_and_squish(pieces)
|
63
|
+
if value.end_with?('.') || value.end_with?('-')
|
64
|
+
value
|
65
|
+
else
|
66
|
+
"#{value}."
|
67
|
+
end
|
68
|
+
end
|
69
|
+
acc += record.fields(%w[880]).filter_map do |field|
|
70
|
+
next unless field.any? { |sf| sf.code == '6' && sf.value.in?(%w[100 110]) }
|
71
|
+
|
72
|
+
suba = field.find_all(&subfield_in?(%w[a])).map do |sf|
|
73
|
+
convert_name_order(sf.value)
|
74
|
+
end.first
|
75
|
+
oth = join_and_squish(field.find_all(&subfield_not_in?(%w[6 8 a t])).map(&:value))
|
76
|
+
join_and_squish [suba, oth]
|
77
|
+
end
|
78
|
+
acc.uniq
|
79
|
+
end
|
80
|
+
|
81
|
+
# Auxiliary Author/Creator search field
|
82
|
+
# @note ported from get_author_creator_2_search_values
|
83
|
+
# @todo port this later
|
84
|
+
# @param [MARC::Record] record
|
85
|
+
# @return [Array<String>] array of extended author/creator values for indexing
|
86
|
+
def search_aux(record); end
|
87
|
+
|
88
|
+
# All author/creator values for display (like #show, but multivalued?) - no 880 linkage
|
89
|
+
# @note ported from get_author_creator_values (indexed as author_creator_a) - shown on results page
|
90
|
+
# @param [MARC::Record] record
|
91
|
+
# @param [Hash] relator_mapping
|
92
|
+
# @return [Array<String>] array of author/creator values for display
|
93
|
+
def values(record, relator_mapping)
|
94
|
+
record.fields(TAGS).map do |field|
|
95
|
+
name_from_main_entry(field, relator_mapping)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Author/Creator values for display
|
100
|
+
# @todo ported from get_author_display - used on record show page. porting did not include 4, e or w values,
|
101
|
+
# which were part of the link object as 'append' values in franklin
|
102
|
+
# @param [MARC::Record] record
|
103
|
+
# @return [Array<String>] array of author/creator values for display
|
104
|
+
def show(record)
|
105
|
+
fields = record.fields(TAGS)
|
106
|
+
fields += record.fields('880').select { |field| subfield_value_in?(field, '6', TAGS) }
|
107
|
+
fields.filter_map do |field|
|
108
|
+
join_subfields(field, &subfield_not_in?(%w[0 1 4 6 8 e w]))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Author/Creator sort. Does not map and include any relator
|
113
|
+
# codes.
|
114
|
+
# @todo This includes any URI from ǂ0 which could help to disambiguate in sorts, but ǂ1 is excluded...
|
115
|
+
# @note ported from get_author_creator_sort_values
|
116
|
+
# @param [MARC::Record] record
|
117
|
+
# @return [String] string with author/creator value for sorting
|
118
|
+
def sort(record)
|
119
|
+
field = record.fields(TAGS).first
|
120
|
+
join_subfields(field, &subfield_not_in?(%w[1 4 6 8 e]))
|
121
|
+
end
|
122
|
+
|
123
|
+
# Author/Creator for faceting. Grabs values from a plethora of fields, joins defined subfields, then trims some
|
124
|
+
# punctuation (@see trim_punctuation)
|
125
|
+
# @todo should trim_punctuation apply to each subfield value, or the joined values? i think the joined values
|
126
|
+
# @note ported from author_creator_xfacet2_input - is this the best choice? check the copyField declarations -
|
127
|
+
# franklin uses author_creator_f
|
128
|
+
# @param [MARC::Record] record
|
129
|
+
# @return [Array<String>] array of author/creator values for faceting
|
130
|
+
def facet(record)
|
131
|
+
source_map = {
|
132
|
+
100 => 'abcdjq', 110 => 'abcdjq', 111 => 'abcen',
|
133
|
+
700 => 'abcdjq', 710 => 'abcdjq', 711 => 'abcen',
|
134
|
+
800 => 'abcdjq', 810 => 'abcdjq', 811 => 'abcen'
|
135
|
+
}
|
136
|
+
source_map.flat_map do |field_num, subfields|
|
137
|
+
record.fields(field_num.to_s).map do |field|
|
138
|
+
trim_punctuation(join_subfields(field, &subfield_in?(subfields.split(''))))
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Conference for display, intended for results display
|
144
|
+
# @note ported from get_conference_values
|
145
|
+
# @param [MARC::Record] record
|
146
|
+
# @param [Hash] relator_map
|
147
|
+
# @return [Array<String>] array of conference values
|
148
|
+
def conference_show(record, relator_map)
|
149
|
+
record.fields('111').filter_map do |field|
|
150
|
+
name_from_main_entry field, relator_map
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Conference detailed display, intended for record show page.
|
155
|
+
# @note ported from get_conference_values
|
156
|
+
# @todo what is ǂi for?
|
157
|
+
# @param [MARC::Record] record
|
158
|
+
# @return [Array<String>] array of conference values
|
159
|
+
def conference_detail_show(record)
|
160
|
+
values = record.fields(%w[111 711]).filter_map do |field|
|
161
|
+
next unless field.indicator2.in? ['', ' ']
|
162
|
+
|
163
|
+
conf = if subfield_undefined? field, 'i'
|
164
|
+
join_subfields field, &subfield_not_in?(%w[0 4 5 6 8 e j w])
|
165
|
+
else
|
166
|
+
''
|
167
|
+
end
|
168
|
+
conf_extra = join_subfields field, &subfield_in?(%w[e j w])
|
169
|
+
join_and_squish [conf, conf_extra].compact_blank
|
170
|
+
end
|
171
|
+
values + record.fields('880').filter_map do |field|
|
172
|
+
next unless subfield_value_in? field, '6', %w[111 711]
|
173
|
+
|
174
|
+
next if subfield_defined? field, 'i'
|
175
|
+
|
176
|
+
conf = join_subfields(field, &subfield_not_in?(%w[0 4 5 6 8 e j w]))
|
177
|
+
conf_extra = join_subfields(field, &subfield_in?(%w[4 e j w]))
|
178
|
+
join_and_squish [conf, conf_extra]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# @todo this supports "Conference" fielded search and may not be needed
|
183
|
+
# @note see get_conference_search_values
|
184
|
+
def conference_search(record); end
|
185
|
+
|
186
|
+
private
|
187
|
+
|
188
|
+
# Trim punctuation method extracted from Traject macro, to ensure consistent output
|
189
|
+
# @todo move to Util?
|
190
|
+
# @param [String] string
|
191
|
+
# @return [String] string with relevant punctuation removed
|
192
|
+
def trim_punctuation(string)
|
193
|
+
return string unless string
|
194
|
+
|
195
|
+
string = string.sub(%r{ *[ ,/;:] *\Z}, '')
|
196
|
+
|
197
|
+
# trailing period if it is preceded by at least three letters (possibly preceded and followed by whitespace)
|
198
|
+
string = string.sub(/( *[[:word:]]{3,})\. *\Z/, '\1')
|
199
|
+
|
200
|
+
# single square bracket characters if they are the start and/or end chars and there are no internal square
|
201
|
+
# brackets.
|
202
|
+
string = string.sub(/\A\[?([^\[\]]+)\]?\Z/, '\1')
|
203
|
+
|
204
|
+
# trim any leading or trailing whitespace
|
205
|
+
string.strip
|
206
|
+
end
|
207
|
+
|
208
|
+
# Extract the information we care about from 1xx fields, map relator codes, and use appropriate punctuation
|
209
|
+
# @param [MARC::Field] field
|
210
|
+
# @return [String] joined subfield values for value from field
|
211
|
+
def name_from_main_entry(field, mapping)
|
212
|
+
s = field.filter_map do |sf|
|
213
|
+
if %w[0 1 4 6 8].exclude?(sf.code)
|
214
|
+
" #{sf.value}"
|
215
|
+
elsif sf.code == '4'
|
216
|
+
relator = translate_relator(sf.value, mapping)
|
217
|
+
next if relator.blank?
|
218
|
+
|
219
|
+
", #{relator}"
|
220
|
+
end
|
221
|
+
end.join
|
222
|
+
(s + (!%w[. -].member?(s.last) ? '.' : '')).squish
|
223
|
+
end
|
224
|
+
|
225
|
+
# Convert "Lastname, First" to "First Lastname"
|
226
|
+
# @param [String] name value for processing
|
227
|
+
# @return [String]
|
228
|
+
def convert_name_order(name)
|
229
|
+
return name unless name.include? ','
|
230
|
+
|
231
|
+
after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
|
232
|
+
before_comma = substring_before(name, ', ')
|
233
|
+
"#{after_comma} #{before_comma}".squish
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Parses Database Subject Category and Database Type local fields
|
5
|
+
class Database < Helper
|
6
|
+
# Database format type used to facet databases, found in
|
7
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
8
|
+
# local field 944} subfield 'a'.
|
9
|
+
DATABASES_FACET_VALUE = 'Database & Article Index'
|
10
|
+
# Penn Libraries' Community of Interest code used in
|
11
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
12
|
+
# local field 943} subfield '2'.
|
13
|
+
COI_CODE = 'penncoi'
|
14
|
+
|
15
|
+
class << self
|
16
|
+
# Retrieves database subtype (subfield 'b') from
|
17
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
18
|
+
# local field 944}. Only returns database subtype if Penn's Database facet value is present in subfield 'a'.
|
19
|
+
# @param [Marc::Record]
|
20
|
+
# @return [Array<string>] Array of types
|
21
|
+
def type(record)
|
22
|
+
record.fields('944').filter_map do |field|
|
23
|
+
# skip unless specified database format type present
|
24
|
+
next unless subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/)
|
25
|
+
|
26
|
+
type = field.find { |subfield| subfield.code == 'b' }
|
27
|
+
type&.value
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Retrieves database subject category/communities of interest (subfield 'a') from
|
32
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
33
|
+
# local field 943}. Only returns database subject category if Penn's Community of Interest code is present in
|
34
|
+
# subfield '2'.
|
35
|
+
# @param [Marc::Record]
|
36
|
+
# @return [Array<string>] Array of categories
|
37
|
+
def db_category(record)
|
38
|
+
return [] unless curated_db?(record)
|
39
|
+
|
40
|
+
record.fields('943').filter_map do |field|
|
41
|
+
# skip unless Community of Interest code is in subfield '2'
|
42
|
+
next unless subfield_value?(field, '2', /#{COI_CODE}/)
|
43
|
+
|
44
|
+
category = field.find { |subfield| subfield.code == 'a' }
|
45
|
+
category&.value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Concatenates database subject category with database sub subject category in the format "category--subcategory"
|
50
|
+
# if both values are present.
|
51
|
+
# Retrieves both values respectively from subfield 'a' and subfield 'b' of
|
52
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
53
|
+
# local field 943}. Only returns subcategory if Penn's Community of Interest code is present in subfield '2'.
|
54
|
+
# @note return value differs from legacy implementation. This version only returns ["category--subcategory"] or
|
55
|
+
# an empty array.
|
56
|
+
# @param [Marc::Record]
|
57
|
+
# @return [Array<string>] Array of "category--subcategory"
|
58
|
+
def db_subcategory(record)
|
59
|
+
return [] unless curated_db?(record)
|
60
|
+
|
61
|
+
record.fields('943').filter_map do |field|
|
62
|
+
# skip unless Community of Interest code is in subfield '2'
|
63
|
+
next unless subfield_value?(field, '2', /#{COI_CODE}/)
|
64
|
+
|
65
|
+
category = field.find { |subfield| subfield.code == 'a' }
|
66
|
+
|
67
|
+
# skip unless category is present
|
68
|
+
next unless category.present?
|
69
|
+
|
70
|
+
subcategory = field.find { |subfield| subfield.code == 'b' }
|
71
|
+
|
72
|
+
# skip unless subcategory is present
|
73
|
+
next unless subcategory.present?
|
74
|
+
|
75
|
+
"#{category.value}--#{subcategory.value}"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# Determines if Database format type is format type used to facet databases
|
82
|
+
# @param [Marc::Record]
|
83
|
+
# @return [TrueClass, FalseClass]
|
84
|
+
def curated_db?(record)
|
85
|
+
record.fields('944').any? { |field| subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/) }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Parser methods for extracting date info as DateTime objects
|
5
|
+
class Date < Helper
|
6
|
+
class << self
|
7
|
+
# Retrieve publication date (Date 1) from {https://www.loc.gov/marc/bibliographic/bd008a.html 008 field}.
|
8
|
+
# Publication date is a four-digit year found in position 7-10 and may contain 'u' characters to represent
|
9
|
+
# partially known dates. We replace any occurrences of 'u' with '0' before converting to DateTime object.
|
10
|
+
# @param [MARC::Record] record
|
11
|
+
# @return [DateTime, nil] The publication date, or nil if date found in record is invalid
|
12
|
+
def publication(record)
|
13
|
+
record.fields('008').filter_map do |field|
|
14
|
+
four_digit_year = sanitize_partially_known_date(field.value[7, 4], '0')
|
15
|
+
|
16
|
+
next unless four_digit_year.present?
|
17
|
+
|
18
|
+
DateTime.new(four_digit_year.to_i)
|
19
|
+
end.first
|
20
|
+
end
|
21
|
+
|
22
|
+
# Retrieve date added (subfield 'q') from enriched marc 'itm' field.
|
23
|
+
# {PennMARC::EnrichedMarc} maps enriched marc fields and subfields created during Alma publishing.
|
24
|
+
# @param [MARC::Record] record
|
25
|
+
# @return [DateTime, nil] The date added, or nil if date found in record is invalid
|
26
|
+
def added(record)
|
27
|
+
record.fields(EnrichedMarc::TAG_ITEM).flat_map do |field|
|
28
|
+
field.filter_map do |subfield|
|
29
|
+
# skip unless field has date created subfield
|
30
|
+
next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
|
31
|
+
|
32
|
+
# On 2022-05-02, this field value (as exported in enriched publishing
|
33
|
+
# job from Alma) began truncating time to day-level granularity. We have
|
34
|
+
# no guarantee that this won't switch back in the future, so for the
|
35
|
+
# foreseeable future we should support both formats.
|
36
|
+
|
37
|
+
format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
|
38
|
+
|
39
|
+
DateTime.strptime(subfield.value, format)
|
40
|
+
|
41
|
+
rescue StandardError => e
|
42
|
+
puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
end.max
|
46
|
+
end
|
47
|
+
|
48
|
+
# Retrieve date last updated from {https://www.loc.gov/marc/bibliographic/bd005.html 005 field}.
|
49
|
+
# Date last updated is a sixteen character String recorded in
|
50
|
+
# {https://www.iso.org/iso-8601-date-and-time-format.html ISO 8601} format.
|
51
|
+
# @param [MARC::Record] record
|
52
|
+
# @return [DateTime, nil] The date last updated, or nil if date found in record is invalid
|
53
|
+
def last_updated(record)
|
54
|
+
record.fields('005').filter_map do |field|
|
55
|
+
date_time_string = field.value
|
56
|
+
|
57
|
+
next if date_time_string.blank?
|
58
|
+
|
59
|
+
next if date_time_string.start_with?('0000')
|
60
|
+
|
61
|
+
DateTime.iso8601(date_time_string).to_datetime
|
62
|
+
|
63
|
+
rescue ArgumentError => e
|
64
|
+
puts "Error parsing last updated date: #{date_time_string} - #{e}"
|
65
|
+
nil
|
66
|
+
end.first
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# Sanitizes a partially known date string by replacing any 'u' occurrences with a specified replacement value.
|
72
|
+
# @param [String] date The date string in '%Y' format, potentially containing 'u' characters.
|
73
|
+
# @param [String] replacement The value with which to replace 'u' occurrences in the date string.
|
74
|
+
# @return [String, nil] The sanitized date string with 'u' characters replaced by the replacement value,
|
75
|
+
# or nil if the date string does not match the expected format.
|
76
|
+
def sanitize_partially_known_date(date, replacement)
|
77
|
+
# early return unless date begins with zero or more digits followed by zero or more occurrences of 'u'
|
78
|
+
return unless /^[0-9]*u*$/.match?(date)
|
79
|
+
|
80
|
+
# replace 'u' occurrences with the specified replacement value
|
81
|
+
date.gsub(/u/, replacement)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|