pennmarc 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
{ "id":["FRANKLIN_9910148543503681"],
|
2
|
+
"grouped_id":["18521155!FRANKLIN_9910148543503681"],
|
3
|
+
"record_source_id":[1],
|
4
|
+
"record_source_f":["Penn"],
|
5
|
+
"nocirc_f_stored":["none"],
|
6
|
+
"alma_mms_id":["9910148543503681"],
|
7
|
+
"oclc_id":["18521155"],
|
8
|
+
"cluster_id":["18521155"],
|
9
|
+
"marcrecord_xml_stored_single_large":["<record><leader>01908cam a2200469 a 4500</leader><controlfield tag=\"005\">20220609191957.0</controlfield><controlfield tag=\"008\">890224t19891989nyua 001 0 eng </controlfield><controlfield tag=\"001\">9910148543503681</controlfield><datafield tag=\"010\" ind1=\" \" ind2=\" \"><subfield code=\"a\"> 88029825 </subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)ocm18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(OCoLC)18521155</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CStRLIN)PAUG89-B7246</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(CaOTULAS)185188489</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"9\">AHA6856</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">1014854</subfield></datafield><datafield tag=\"035\" ind1=\" \" ind2=\" \"><subfield code=\"a\">(PU)1014854-penndb-Voyager</subfield></datafield><datafield tag=\"040\" ind1=\" \" ind2=\" \"><subfield code=\"b\">eng</subfield><subfield code=\"d\">CSt-B</subfield></datafield><datafield tag=\"043\" ind1=\" \" ind2=\" \"><subfield code=\"a\">n-us---</subfield></datafield><datafield tag=\"050\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield></datafield><datafield tag=\"082\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">343.7306/8</subfield><subfield code=\"a\">347.30368</subfield><subfield code=\"2\">19</subfield></datafield><datafield tag=\"090\" ind1=\" \" ind2=\" \"><subfield code=\"a\">KF6450</subfield><subfield code=\"b\">.C59 1989</subfield><subfield code=\"i\">11/30/89 CTZ</subfield></datafield><datafield tag=\"245\" ind1=\"0\" ind2=\"4\"><subfield code=\"a\">The Coopers & Lybrand guide to business tax strategies and planning /</subfield><subfield code=\"c\">by the partners of Coopers & Lybrand.</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"1\"><subfield code=\"a\">New York :</subfield><subfield code=\"b\">Simon and Schuster,</subfield><subfield code=\"c\">[1989]</subfield></datafield><datafield tag=\"264\" ind1=\" \" ind2=\"4\"><subfield code=\"c\">©1989</subfield></datafield><datafield tag=\"300\" ind1=\" \" ind2=\" \"><subfield code=\"a\">x, 198 pages :</subfield><subfield code=\"b\">illustrations ;</subfield><subfield code=\"c\">24 cm</subfield></datafield><datafield tag=\"336\" ind1=\" \" ind2=\" \"><subfield code=\"a\">text</subfield><subfield code=\"b\">txt</subfield><subfield code=\"2\">rdacontent</subfield></datafield><datafield tag=\"337\" ind1=\" \" ind2=\" \"><subfield code=\"a\">unmediated</subfield><subfield code=\"b\">n</subfield><subfield code=\"2\">rdamedia</subfield></datafield><datafield tag=\"338\" ind1=\" \" ind2=\" \"><subfield code=\"a\">volume</subfield><subfield code=\"b\">nc</subfield><subfield code=\"2\">rdacarrier</subfield></datafield><datafield tag=\"500\" ind1=\" \" ind2=\" \"><subfield code=\"a\">Edited by Jonathan J. Davies and others.</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2009118044</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Business enterprises</subfield><subfield code=\"x\">Taxation.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/842634</subfield></datafield><datafield tag=\"651\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">United States.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1204155</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"0\"><subfield code=\"a\">Tax planning</subfield><subfield code=\"z\">United States.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/subjects/sh2008112546</subfield></datafield><datafield tag=\"650\" ind1=\" \" ind2=\"7\"><subfield code=\"a\">Tax planning.</subfield><subfield code=\"2\">fast</subfield><subfield code=\"0\">http://id.worldcat.org/fast/1143815</subfield></datafield><datafield tag=\"700\" ind1=\"1\" ind2=\" \"><subfield code=\"a\">Davies, Jonathan J.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n82126416</subfield></datafield><datafield tag=\"710\" ind1=\"2\" ind2=\" \"><subfield code=\"a\">Coopers & Lybrand.</subfield><subfield code=\"0\">http://id.loc.gov/authorities/names/n79063025</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Guide to business tax strategies and planning.</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Business tax strategies and planning</subfield></datafield><datafield tag=\"740\" ind1=\"0\" ind2=\" \"><subfield code=\"a\">Tax strategies and planning.</subfield></datafield><datafield tag=\"902\" ind1=\" \" ind2=\" \"><subfield code=\"a\">MARCIVE 2022</subfield></datafield><datafield tag=\"950\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield><datafield tag=\"955\" ind1=\" \" ind2=\" \"><subfield code=\"l\">LIPP</subfield><subfield code=\"c\">1</subfield><subfield code=\"q\">89-B7246-1</subfield><subfield code=\"r\">[01043 6272]</subfield><subfield code=\"i\">11/30/89 C</subfield></datafield></record>"],
|
10
|
+
"access_f_stored":["At the library"],
|
11
|
+
"format_f_stored":["Book"],
|
12
|
+
"author_creator_xfacet2_input":["nDavies, Jonathan J.","nCoopers & Lybrand"],
|
13
|
+
"subject_search":["Business enterprises Taxation United States. http://id.loc.gov/authorities/subjects/sh2009118044","Business enterprises Taxation. fast http://id.worldcat.org/fast/842634","United States. fast http://id.worldcat.org/fast/1204155","Tax planning United States. http://id.loc.gov/authorities/subjects/sh2008112546","Tax planning. fast http://id.worldcat.org/fast/1143815"],
|
14
|
+
"toplevel_subject_f":["Business enterprises","Business enterprises","United States","Tax planning","Tax planning"],
|
15
|
+
"call_number_xfacet":["{\"raw\":\"KF6450 .C59 1989\"}"],
|
16
|
+
"language_f_stored":["English"],
|
17
|
+
"language_search":["English"],
|
18
|
+
"library_f_stored":["LIBRA"],
|
19
|
+
"specific_location_f_stored":["LIBRA"],
|
20
|
+
"classification_f_stored":["K - Law"],
|
21
|
+
"title_1_search":["The Coopers & Lybrand guide to business tax strategies and planning /"],
|
22
|
+
"title_2_search":["The Coopers & Lybrand guide to business tax strategies and planning /","Guide to business tax strategies and planning.","Business tax strategies and planning","Tax strategies and planning."],
|
23
|
+
"author_creator_2_search":["Davies, Jonathan J. http://id.loc.gov/authorities/names/n82126416","Jonathan J. Davies http://id.loc.gov/authorities/names/n82126416","Coopers & Lybrand. http://id.loc.gov/authorities/names/n79063025","http://id.loc.gov/authorities/names/n79063025"],
|
24
|
+
"title":["The Coopers & Lybrand guide to business tax strategies and planning"],
|
25
|
+
"title_xfacet":["{\"raw\":{\"prefix\":\"The \",\"filing\":\"Coopers & Lybrand guide to business tax strategies and planning / \"}}"],
|
26
|
+
"title_nssort":["Coopers & Lybrand guide to business tax strategies and planning / The "],
|
27
|
+
"title_sort_tl":["Coopers & Lybrand guide to business tax strategies and planning / "],
|
28
|
+
"publication_a":["New York : Simon and Schuster, [1989] , ©1989"],
|
29
|
+
"elvl_rank_isort":[0],
|
30
|
+
"hld_count_isort":[1],
|
31
|
+
"itm_count_isort":[1],
|
32
|
+
"empty_hld_count_isort":[0],
|
33
|
+
"subject_xfacet2_input":["sBusiness enterprises--Taxation--United States","{\"val\":\"Business enterprises--Taxation\",\"prefix\":\"f\"}","{\"val\":\"United States\",\"prefix\":\"f\"}","sTax planning--United States","{\"val\":\"Tax planning\",\"prefix\":\"f\"}"],
|
34
|
+
"recently_added_isort":[1496742800],
|
35
|
+
"last_update_isort":[1654802397],
|
36
|
+
"publication_date_ssort":["1989"],
|
37
|
+
"pub_min_dtsort":["1989-01-01T00:00:00Z"],
|
38
|
+
"pub_max_dtsort":["1990-01-01T00:00:00Z"],
|
39
|
+
"content_min_dtsort":["1989-01-01T00:00:00Z"],
|
40
|
+
"content_max_dtsort":["1990-01-01T00:00:00Z"],
|
41
|
+
"publication_date_f_stored":["1980s"],
|
42
|
+
"publication_dr":["[1989 TO 1989]"],
|
43
|
+
"content_dr":["[1989 TO 1989]"],
|
44
|
+
"call_number_search":["KF6450 .C59 1989"],
|
45
|
+
"physical_holdings_json":["[{\"holding_id\":\"22418068440003681\",\"location\":\"stor\",\"classification_part\":\"KF6450\",\"item_part\":\".C59 1989\"}]"],
|
46
|
+
"corporate_author_search":["Coopers & Lybrand."],
|
47
|
+
"place_of_publication_search":["New York :"],
|
48
|
+
"publisher_search":["Simon and Schuster,"]
|
49
|
+
}
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# MARC encoding level
|
4
|
+
# See: https://www.oclc.org/bibformats/en/fixedfield/elvl.html
|
5
|
+
# Not sure how this is used
|
6
|
+
module PennMARC
|
7
|
+
module EncodingLevel
|
8
|
+
# Official MARC codes (https://www.loc.gov/marc/bibliographic/bdleader.html)
|
9
|
+
FULL = ' '
|
10
|
+
FULL_NOT_EXAMINED = '1'
|
11
|
+
UNFULL_NOT_EXAMINED = '2'
|
12
|
+
ABBREVIATED = '3'
|
13
|
+
CORE = '4'
|
14
|
+
PRELIMINARY = '5'
|
15
|
+
MINIMAL = '7'
|
16
|
+
PREPUBLICATION = '8'
|
17
|
+
UNKNOWN = 'u'
|
18
|
+
NOT_APPLICABLE = 'z'
|
19
|
+
|
20
|
+
# OCLC extension codes (https://www.oclc.org/bibformats/en/fixedfield/elvl.html)
|
21
|
+
OCLC_FULL = 'I'
|
22
|
+
OCLC_MINIMAL = 'K'
|
23
|
+
OCLC_BATCH_LEGACY = 'L'
|
24
|
+
OCLC_BATCH = 'M'
|
25
|
+
OCLC_SOURCE_DELETED = 'J'
|
26
|
+
|
27
|
+
RANK = {
|
28
|
+
# top 4 (per nelsonrr), do not differentiate among "good" records
|
29
|
+
FULL => 0,
|
30
|
+
FULL_NOT_EXAMINED => 0, # 1
|
31
|
+
OCLC_FULL => 0, # 2
|
32
|
+
CORE => 0, # 3
|
33
|
+
UNFULL_NOT_EXAMINED => 4,
|
34
|
+
ABBREVIATED => 5,
|
35
|
+
PRELIMINARY => 6,
|
36
|
+
MINIMAL => 7,
|
37
|
+
OCLC_MINIMAL => 8,
|
38
|
+
OCLC_BATCH => 9,
|
39
|
+
OCLC_BATCH_LEGACY => 10,
|
40
|
+
OCLC_SOURCE_DELETED => 11
|
41
|
+
}.freeze
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Constants for Alma's MARC enrichment
|
4
|
+
# MARC enrichment is performed during the Alma Publishing process
|
5
|
+
# @see https://developers.exlibrisgroup.com/alma/apis/docs/bibs/R0VUIC9hbG1hd3MvdjEvYmlicy97bW1zX2lkfQ==/
|
6
|
+
# Alma documentation for these added fields
|
7
|
+
module PennMARC
|
8
|
+
module EnrichedMarc
|
9
|
+
# terminology follows the Publishing Profile screen
|
10
|
+
TAG_HOLDING = 'hld'
|
11
|
+
TAG_ITEM = 'itm'
|
12
|
+
TAG_ELECTRONIC_INVENTORY = 'prt'
|
13
|
+
TAG_DIGITAL_INVENTORY = 'dig'
|
14
|
+
|
15
|
+
# these are 852 subfield codes; terminology comes from MARC spec
|
16
|
+
SUB_HOLDING_SHELVING_LOCATION = 'c'
|
17
|
+
SUB_HOLDING_SEQUENCE_NUMBER = '8'
|
18
|
+
SUB_HOLDING_CLASSIFICATION_PART = 'h'
|
19
|
+
SUB_HOLDING_ITEM_PART = 'i'
|
20
|
+
|
21
|
+
SUB_ITEM_CURRENT_LOCATION = 'g'
|
22
|
+
SUB_ITEM_CALL_NUMBER_TYPE = 'h'
|
23
|
+
SUB_ITEM_CALL_NUMBER = 'i'
|
24
|
+
SUB_ITEM_DATE_CREATED = 'q'
|
25
|
+
|
26
|
+
SUB_ELEC_PORTFOLIO_PID = 'a'
|
27
|
+
SUB_ELEC_ACCESS_URL = 'b'
|
28
|
+
SUB_ELEC_COLLECTION_NAME = 'c'
|
29
|
+
SUB_ELEC_COVERAGE = 'g'
|
30
|
+
|
31
|
+
# TODO: evaluate this in context of changed boundwiths processing
|
32
|
+
# a subfield code NOT used by the MARC 21 spec for 852 holdings records.
|
33
|
+
# we add this subfield during preprocessing to store boundwith record IDs.
|
34
|
+
SUB_BOUND_WITH_ID = 'y'
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Shared values for controlling inclusion of subject or genre headings
|
5
|
+
module HeadingControl
|
6
|
+
# These codes are expected to be found in sf2 when the indicator2 value is 7, indicating "source specified". There
|
7
|
+
# are some sources whose headings we don't want to display.
|
8
|
+
ALLOWED_SOURCE_CODES = %w[aat cct fast ftamc gmgpc gsafd homoit jlabsh lcgft lcsh lcstt lctgm
|
9
|
+
local/osu mesh ndlsh nlksh rbbin rbgenr rbmscv rbpap rbpri rbprov rbpub rbtyp].freeze
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Do Citation-y stuff
|
5
|
+
class Citation < Helper
|
6
|
+
class << self
|
7
|
+
# Field 510 contains Citations or references to published bibliographic descriptions,
|
8
|
+
# reviews, abstracts, or indexes of the content of the described item. Used to specify where an item has been
|
9
|
+
# cited or reviewed. Citations or references may be given in a brief form (i.e., using generally recognizable
|
10
|
+
# abbreviations, etc.). The actual text of a published description is not recorded in field 510 but rather in
|
11
|
+
# field 520 (Summary, Etc. Note).
|
12
|
+
# https://www.loc.gov/marc/bibliographic/bd510.html
|
13
|
+
# @param [MARC::Record] record
|
14
|
+
# @return [Array] array of citations and any linked alternates
|
15
|
+
def cited_in_show(record)
|
16
|
+
datafield_and_linked_alternate(record, '510')
|
17
|
+
end
|
18
|
+
|
19
|
+
# Field 524 is the Preferred Citation of Described Materials Note. It is the Format for the citation of the
|
20
|
+
# described materials that is preferred by the custodian. When multiple citation formats exist for the same item,
|
21
|
+
# each is recorded in a separate occurrence of field 524. The note is sometimes displayed and/or printed with an
|
22
|
+
# introductory phrase that is generated as a display constant based on the first indicator value.
|
23
|
+
# https://www.loc.gov/marc/bibliographic/bd524.html
|
24
|
+
# @param [MARC::Record] record
|
25
|
+
# @return [Array] array of citation of described materials note and any linked alternates
|
26
|
+
def cite_as_show(record)
|
27
|
+
datafield_and_linked_alternate(record, '524')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,237 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Do Creator & Author field processing. Main methods pull from 110 and 111 fields. Display methods here no longer
|
5
|
+
# return data structures intended for generating "search" links, but some of the split subfield parsing remains from
|
6
|
+
# ported methods in case we need to replicate that functionality.
|
7
|
+
# @todo can there ever be multiple 100 fields?
|
8
|
+
# can ǂe and ǂ4 both be used at the same time? seems to result in duplicate values
|
9
|
+
class Creator < Helper
|
10
|
+
class << self
|
11
|
+
# Main tags for Author/Creator information
|
12
|
+
TAGS = %w[100 110].freeze
|
13
|
+
# Aux tags for Author/Creator information, for use in search_aux method
|
14
|
+
AUX_TAGS = %w[100 110 111 400 410 411 700 710 711 800 810 811].freeze
|
15
|
+
|
16
|
+
# Author/Creator search field. Includes all subfield values (even ǂ0 URIs) from
|
17
|
+
# {https://www.oclc.org/bibformats/en/1xx/100.html 100 Main Entry--Personal Name} and
|
18
|
+
# {https://www.oclc.org/bibformats/en/1xx/110.html 110 Main Entry--Corporate Name}. Maps any relator codes found
|
19
|
+
# in ǂ4. To better handle name searches, returns names as both "First Last" and "Last, First" if a comma is found
|
20
|
+
# in ǂa. Also indexes any linked values in the 880. Some of the search fields remain incomplete and may need to be
|
21
|
+
# further investigated and ported when search result relevancy is considered.
|
22
|
+
# @todo this seems bad - why include relator labels? URIs? punctuation? leaving mostly as-is for now,
|
23
|
+
# but this should be reexamined in the relevancy-tuning phase. URIs should def be removed. and shouldn't
|
24
|
+
# indicator1 tell us the order of the name?
|
25
|
+
# @note ported from get_author_creator_1_search_values
|
26
|
+
# @param [MARC::Record] record
|
27
|
+
# @param [Hash] relator_mapping
|
28
|
+
# @return [Array<String>] array of author/creator values for indexing
|
29
|
+
def search(record, relator_mapping)
|
30
|
+
acc = record.fields(TAGS).map do |field|
|
31
|
+
pieces = field.filter_map do |sf|
|
32
|
+
if sf.code == 'a'
|
33
|
+
convert_name_order(sf.value)
|
34
|
+
elsif %w[a 1 4 6 8].exclude?(sf.code)
|
35
|
+
sf.value
|
36
|
+
elsif sf.code == '4'
|
37
|
+
relator = translate_relator(sf.value, relator_mapping)
|
38
|
+
next if relator.blank?
|
39
|
+
|
40
|
+
relator
|
41
|
+
end
|
42
|
+
end
|
43
|
+
value = join_and_squish(pieces)
|
44
|
+
if value.end_with?('.') || value.end_with?('-')
|
45
|
+
value
|
46
|
+
else
|
47
|
+
"#{value}."
|
48
|
+
end
|
49
|
+
end
|
50
|
+
# a second iteration over the same fields produces name entries with the names not reordered
|
51
|
+
acc += record.fields(TAGS).map do |field|
|
52
|
+
pieces = field.filter_map do |sf|
|
53
|
+
if !%w[4 6 8].member?(sf.code)
|
54
|
+
sf.value
|
55
|
+
elsif sf.code == '4'
|
56
|
+
relator = translate_relator(sf.value, relator_mapping)
|
57
|
+
next if relator.blank?
|
58
|
+
|
59
|
+
relator
|
60
|
+
end
|
61
|
+
end
|
62
|
+
value = join_and_squish(pieces)
|
63
|
+
if value.end_with?('.') || value.end_with?('-')
|
64
|
+
value
|
65
|
+
else
|
66
|
+
"#{value}."
|
67
|
+
end
|
68
|
+
end
|
69
|
+
acc += record.fields(%w[880]).filter_map do |field|
|
70
|
+
next unless field.any? { |sf| sf.code == '6' && sf.value.in?(%w[100 110]) }
|
71
|
+
|
72
|
+
suba = field.find_all(&subfield_in?(%w[a])).map do |sf|
|
73
|
+
convert_name_order(sf.value)
|
74
|
+
end.first
|
75
|
+
oth = join_and_squish(field.find_all(&subfield_not_in?(%w[6 8 a t])).map(&:value))
|
76
|
+
join_and_squish [suba, oth]
|
77
|
+
end
|
78
|
+
acc.uniq
|
79
|
+
end
|
80
|
+
|
81
|
+
# Auxiliary Author/Creator search field
|
82
|
+
# @note ported from get_author_creator_2_search_values
|
83
|
+
# @todo port this later
|
84
|
+
# @param [MARC::Record] record
|
85
|
+
# @return [Array<String>] array of extended author/creator values for indexing
|
86
|
+
def search_aux(record); end
|
87
|
+
|
88
|
+
# All author/creator values for display (like #show, but multivalued?) - no 880 linkage
|
89
|
+
# @note ported from get_author_creator_values (indexed as author_creator_a) - shown on results page
|
90
|
+
# @param [MARC::Record] record
|
91
|
+
# @param [Hash] relator_mapping
|
92
|
+
# @return [Array<String>] array of author/creator values for display
|
93
|
+
def values(record, relator_mapping)
|
94
|
+
record.fields(TAGS).map do |field|
|
95
|
+
name_from_main_entry(field, relator_mapping)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# Author/Creator values for display
|
100
|
+
# @todo ported from get_author_display - used on record show page. porting did not include 4, e or w values,
|
101
|
+
# which were part of the link object as 'append' values in franklin
|
102
|
+
# @param [MARC::Record] record
|
103
|
+
# @return [Array<String>] array of author/creator values for display
|
104
|
+
def show(record)
|
105
|
+
fields = record.fields(TAGS)
|
106
|
+
fields += record.fields('880').select { |field| subfield_value_in?(field, '6', TAGS) }
|
107
|
+
fields.filter_map do |field|
|
108
|
+
join_subfields(field, &subfield_not_in?(%w[0 1 4 6 8 e w]))
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# Author/Creator sort. Does not map and include any relator
|
113
|
+
# codes.
|
114
|
+
# @todo This includes any URI from ǂ0 which could help to disambiguate in sorts, but ǂ1 is excluded...
|
115
|
+
# @note ported from get_author_creator_sort_values
|
116
|
+
# @param [MARC::Record] record
|
117
|
+
# @return [String] string with author/creator value for sorting
|
118
|
+
def sort(record)
|
119
|
+
field = record.fields(TAGS).first
|
120
|
+
join_subfields(field, &subfield_not_in?(%w[1 4 6 8 e]))
|
121
|
+
end
|
122
|
+
|
123
|
+
# Author/Creator for faceting. Grabs values from a plethora of fields, joins defined subfields, then trims some
|
124
|
+
# punctuation (@see trim_punctuation)
|
125
|
+
# @todo should trim_punctuation apply to each subfield value, or the joined values? i think the joined values
|
126
|
+
# @note ported from author_creator_xfacet2_input - is this the best choice? check the copyField declarations -
|
127
|
+
# franklin uses author_creator_f
|
128
|
+
# @param [MARC::Record] record
|
129
|
+
# @return [Array<String>] array of author/creator values for faceting
|
130
|
+
def facet(record)
|
131
|
+
source_map = {
|
132
|
+
100 => 'abcdjq', 110 => 'abcdjq', 111 => 'abcen',
|
133
|
+
700 => 'abcdjq', 710 => 'abcdjq', 711 => 'abcen',
|
134
|
+
800 => 'abcdjq', 810 => 'abcdjq', 811 => 'abcen'
|
135
|
+
}
|
136
|
+
source_map.flat_map do |field_num, subfields|
|
137
|
+
record.fields(field_num.to_s).map do |field|
|
138
|
+
trim_punctuation(join_subfields(field, &subfield_in?(subfields.split(''))))
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Conference for display, intended for results display
|
144
|
+
# @note ported from get_conference_values
|
145
|
+
# @param [MARC::Record] record
|
146
|
+
# @param [Hash] relator_map
|
147
|
+
# @return [Array<String>] array of conference values
|
148
|
+
def conference_show(record, relator_map)
|
149
|
+
record.fields('111').filter_map do |field|
|
150
|
+
name_from_main_entry field, relator_map
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
# Conference detailed display, intended for record show page.
|
155
|
+
# @note ported from get_conference_values
|
156
|
+
# @todo what is ǂi for?
|
157
|
+
# @param [MARC::Record] record
|
158
|
+
# @return [Array<String>] array of conference values
|
159
|
+
def conference_detail_show(record)
|
160
|
+
values = record.fields(%w[111 711]).filter_map do |field|
|
161
|
+
next unless field.indicator2.in? ['', ' ']
|
162
|
+
|
163
|
+
conf = if subfield_undefined? field, 'i'
|
164
|
+
join_subfields field, &subfield_not_in?(%w[0 4 5 6 8 e j w])
|
165
|
+
else
|
166
|
+
''
|
167
|
+
end
|
168
|
+
conf_extra = join_subfields field, &subfield_in?(%w[e j w])
|
169
|
+
join_and_squish [conf, conf_extra].compact_blank
|
170
|
+
end
|
171
|
+
values + record.fields('880').filter_map do |field|
|
172
|
+
next unless subfield_value_in? field, '6', %w[111 711]
|
173
|
+
|
174
|
+
next if subfield_defined? field, 'i'
|
175
|
+
|
176
|
+
conf = join_subfields(field, &subfield_not_in?(%w[0 4 5 6 8 e j w]))
|
177
|
+
conf_extra = join_subfields(field, &subfield_in?(%w[4 e j w]))
|
178
|
+
join_and_squish [conf, conf_extra]
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
# @todo this supports "Conference" fielded search and may not be needed
|
183
|
+
# @note see get_conference_search_values
|
184
|
+
def conference_search(record); end
|
185
|
+
|
186
|
+
private
|
187
|
+
|
188
|
+
# Trim punctuation method extracted from Traject macro, to ensure consistent output
|
189
|
+
# @todo move to Util?
|
190
|
+
# @param [String] string
|
191
|
+
# @return [String] string with relevant punctuation removed
|
192
|
+
def trim_punctuation(string)
|
193
|
+
return string unless string
|
194
|
+
|
195
|
+
string = string.sub(%r{ *[ ,/;:] *\Z}, '')
|
196
|
+
|
197
|
+
# trailing period if it is preceded by at least three letters (possibly preceded and followed by whitespace)
|
198
|
+
string = string.sub(/( *[[:word:]]{3,})\. *\Z/, '\1')
|
199
|
+
|
200
|
+
# single square bracket characters if they are the start and/or end chars and there are no internal square
|
201
|
+
# brackets.
|
202
|
+
string = string.sub(/\A\[?([^\[\]]+)\]?\Z/, '\1')
|
203
|
+
|
204
|
+
# trim any leading or trailing whitespace
|
205
|
+
string.strip
|
206
|
+
end
|
207
|
+
|
208
|
+
# Extract the information we care about from 1xx fields, map relator codes, and use appropriate punctuation
|
209
|
+
# @param [MARC::Field] field
|
210
|
+
# @return [String] joined subfield values for value from field
|
211
|
+
def name_from_main_entry(field, mapping)
|
212
|
+
s = field.filter_map do |sf|
|
213
|
+
if %w[0 1 4 6 8].exclude?(sf.code)
|
214
|
+
" #{sf.value}"
|
215
|
+
elsif sf.code == '4'
|
216
|
+
relator = translate_relator(sf.value, mapping)
|
217
|
+
next if relator.blank?
|
218
|
+
|
219
|
+
", #{relator}"
|
220
|
+
end
|
221
|
+
end.join
|
222
|
+
(s + (!%w[. -].member?(s.last) ? '.' : '')).squish
|
223
|
+
end
|
224
|
+
|
225
|
+
# Convert "Lastname, First" to "First Lastname"
|
226
|
+
# @param [String] name value for processing
|
227
|
+
# @return [String]
|
228
|
+
def convert_name_order(name)
|
229
|
+
return name unless name.include? ','
|
230
|
+
|
231
|
+
after_comma = join_and_squish([trim_trailing(:comma, substring_after(name, ', '))])
|
232
|
+
before_comma = substring_before(name, ', ')
|
233
|
+
"#{after_comma} #{before_comma}".squish
|
234
|
+
end
|
235
|
+
end
|
236
|
+
end
|
237
|
+
end
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Parses Database Subject Category and Database Type local fields
|
5
|
+
class Database < Helper
|
6
|
+
# Database format type used to facet databases, found in
|
7
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
8
|
+
# local field 944} subfield 'a'.
|
9
|
+
DATABASES_FACET_VALUE = 'Database & Article Index'
|
10
|
+
# Penn Libraries' Community of Interest code used in
|
11
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
12
|
+
# local field 943} subfield '2'.
|
13
|
+
COI_CODE = 'penncoi'
|
14
|
+
|
15
|
+
class << self
|
16
|
+
# Retrieves database subtype (subfield 'b') from
|
17
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
18
|
+
# local field 944}. Only returns database subtype if Penn's Database facet value is present in subfield 'a'.
|
19
|
+
# @param [Marc::Record]
|
20
|
+
# @return [Array<string>] Array of types
|
21
|
+
def type(record)
|
22
|
+
record.fields('944').filter_map do |field|
|
23
|
+
# skip unless specified database format type present
|
24
|
+
next unless subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/)
|
25
|
+
|
26
|
+
type = field.find { |subfield| subfield.code == 'b' }
|
27
|
+
type&.value
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Retrieves database subject category/communities of interest (subfield 'a') from
|
32
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
33
|
+
# local field 943}. Only returns database subject category if Penn's Community of Interest code is present in
|
34
|
+
# subfield '2'.
|
35
|
+
# @param [Marc::Record]
|
36
|
+
# @return [Array<string>] Array of categories
|
37
|
+
def db_category(record)
|
38
|
+
return [] unless curated_db?(record)
|
39
|
+
|
40
|
+
record.fields('943').filter_map do |field|
|
41
|
+
# skip unless Community of Interest code is in subfield '2'
|
42
|
+
next unless subfield_value?(field, '2', /#{COI_CODE}/)
|
43
|
+
|
44
|
+
category = field.find { |subfield| subfield.code == 'a' }
|
45
|
+
category&.value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Concatenates database subject category with database sub subject category in the format "category--subcategory"
|
50
|
+
# if both values are present.
|
51
|
+
# Retrieves both values respectively from subfield 'a' and subfield 'b' of
|
52
|
+
# {https://upennlibrary.atlassian.net/wiki/spaces/ALMA/pages/323912493/Local+9XX+Field+Use+in+Almalocal
|
53
|
+
# local field 943}. Only returns subcategory if Penn's Community of Interest code is present in subfield '2'.
|
54
|
+
# @note return value differs from legacy implementation. This version only returns ["category--subcategory"] or
|
55
|
+
# an empty array.
|
56
|
+
# @param [Marc::Record]
|
57
|
+
# @return [Array<string>] Array of "category--subcategory"
|
58
|
+
def db_subcategory(record)
|
59
|
+
return [] unless curated_db?(record)
|
60
|
+
|
61
|
+
record.fields('943').filter_map do |field|
|
62
|
+
# skip unless Community of Interest code is in subfield '2'
|
63
|
+
next unless subfield_value?(field, '2', /#{COI_CODE}/)
|
64
|
+
|
65
|
+
category = field.find { |subfield| subfield.code == 'a' }
|
66
|
+
|
67
|
+
# skip unless category is present
|
68
|
+
next unless category.present?
|
69
|
+
|
70
|
+
subcategory = field.find { |subfield| subfield.code == 'b' }
|
71
|
+
|
72
|
+
# skip unless subcategory is present
|
73
|
+
next unless subcategory.present?
|
74
|
+
|
75
|
+
"#{category.value}--#{subcategory.value}"
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
# Determines if Database format type is format type used to facet databases
|
82
|
+
# @param [Marc::Record]
|
83
|
+
# @return [TrueClass, FalseClass]
|
84
|
+
def curated_db?(record)
|
85
|
+
record.fields('944').any? { |field| subfield_value?(field, 'a', /#{DATABASES_FACET_VALUE}/) }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module PennMARC
|
4
|
+
# Parser methods for extracting date info as DateTime objects
|
5
|
+
class Date < Helper
|
6
|
+
class << self
|
7
|
+
# Retrieve publication date (Date 1) from {https://www.loc.gov/marc/bibliographic/bd008a.html 008 field}.
|
8
|
+
# Publication date is a four-digit year found in position 7-10 and may contain 'u' characters to represent
|
9
|
+
# partially known dates. We replace any occurrences of 'u' with '0' before converting to DateTime object.
|
10
|
+
# @param [MARC::Record] record
|
11
|
+
# @return [DateTime, nil] The publication date, or nil if date found in record is invalid
|
12
|
+
def publication(record)
|
13
|
+
record.fields('008').filter_map do |field|
|
14
|
+
four_digit_year = sanitize_partially_known_date(field.value[7, 4], '0')
|
15
|
+
|
16
|
+
next unless four_digit_year.present?
|
17
|
+
|
18
|
+
DateTime.new(four_digit_year.to_i)
|
19
|
+
end.first
|
20
|
+
end
|
21
|
+
|
22
|
+
# Retrieve date added (subfield 'q') from enriched marc 'itm' field.
|
23
|
+
# {PennMARC::EnrichedMarc} maps enriched marc fields and subfields created during Alma publishing.
|
24
|
+
# @param [MARC::Record] record
|
25
|
+
# @return [DateTime, nil] The date added, or nil if date found in record is invalid
|
26
|
+
def added(record)
|
27
|
+
record.fields(EnrichedMarc::TAG_ITEM).flat_map do |field|
|
28
|
+
field.filter_map do |subfield|
|
29
|
+
# skip unless field has date created subfield
|
30
|
+
next unless subfield_defined?(field, EnrichedMarc::SUB_ITEM_DATE_CREATED)
|
31
|
+
|
32
|
+
# On 2022-05-02, this field value (as exported in enriched publishing
|
33
|
+
# job from Alma) began truncating time to day-level granularity. We have
|
34
|
+
# no guarantee that this won't switch back in the future, so for the
|
35
|
+
# foreseeable future we should support both formats.
|
36
|
+
|
37
|
+
format = subfield.value.size == 10 ? '%Y-%m-%d' : '%Y-%m-%d %H:%M:%S'
|
38
|
+
|
39
|
+
DateTime.strptime(subfield.value, format)
|
40
|
+
|
41
|
+
rescue StandardError => e
|
42
|
+
puts "Error parsing date in date added subfield: #{subfield.value} - #{e}"
|
43
|
+
nil
|
44
|
+
end
|
45
|
+
end.max
|
46
|
+
end
|
47
|
+
|
48
|
+
# Retrieve date last updated from {https://www.loc.gov/marc/bibliographic/bd005.html 005 field}.
|
49
|
+
# Date last updated is a sixteen character String recorded in
|
50
|
+
# {https://www.iso.org/iso-8601-date-and-time-format.html ISO 8601} format.
|
51
|
+
# @param [MARC::Record] record
|
52
|
+
# @return [DateTime, nil] The date last updated, or nil if date found in record is invalid
|
53
|
+
def last_updated(record)
|
54
|
+
record.fields('005').filter_map do |field|
|
55
|
+
date_time_string = field.value
|
56
|
+
|
57
|
+
next if date_time_string.blank?
|
58
|
+
|
59
|
+
next if date_time_string.start_with?('0000')
|
60
|
+
|
61
|
+
DateTime.iso8601(date_time_string).to_datetime
|
62
|
+
|
63
|
+
rescue ArgumentError => e
|
64
|
+
puts "Error parsing last updated date: #{date_time_string} - #{e}"
|
65
|
+
nil
|
66
|
+
end.first
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
# Sanitizes a partially known date string by replacing any 'u' occurrences with a specified replacement value.
|
72
|
+
# @param [String] date The date string in '%Y' format, potentially containing 'u' characters.
|
73
|
+
# @param [String] replacement The value with which to replace 'u' occurrences in the date string.
|
74
|
+
# @return [String, nil] The sanitized date string with 'u' characters replaced by the replacement value,
|
75
|
+
# or nil if the date string does not match the expected format.
|
76
|
+
def sanitize_partially_known_date(date, replacement)
|
77
|
+
# early return unless date begins with zero or more digits followed by zero or more occurrences of 'u'
|
78
|
+
return unless /^[0-9]*u*$/.match?(date)
|
79
|
+
|
80
|
+
# replace 'u' occurrences with the specified replacement value
|
81
|
+
date.gsub(/u/, replacement)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|