berkeley_library-marc 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/marc.iml +101 -0
  6. data/.idea/misc.xml +4 -0
  7. data/.idea/modules.xml +8 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +12 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +4 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-marc.gemspec +42 -0
  21. data/docker-compose.yml +15 -0
  22. data/lib/.rubocop.yml +6 -0
  23. data/lib/berkeley_library/marc.rb +3 -0
  24. data/lib/berkeley_library/marc/field_info.rb +1 -0
  25. data/lib/berkeley_library/marc/field_info/ctrl_fields/data/ctrl_fields_standard.txt +2143 -0
  26. data/lib/berkeley_library/marc/field_info/leader/data/leader_standard.txt +87 -0
  27. data/lib/berkeley_library/marc/field_info/var_fields.rb +46 -0
  28. data/lib/berkeley_library/marc/field_info/var_fields/data.rb +4 -0
  29. data/lib/berkeley_library/marc/field_info/var_fields/data/mapping-orig.tsv +265 -0
  30. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx.txt +53 -0
  31. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx_parsed.rb +51 -0
  32. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard.txt +5458 -0
  33. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard_parsed.rb +6577 -0
  34. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved.txt +44 -0
  35. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved_parsed.rb +30 -0
  36. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind.txt +105 -0
  37. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind_parsed.rb +114 -0
  38. data/lib/berkeley_library/marc/field_info/var_fields/ind_def.rb +39 -0
  39. data/lib/berkeley_library/marc/field_info/var_fields/ind_val_def.rb +27 -0
  40. data/lib/berkeley_library/marc/field_info/var_fields/instrument_or_voices_code.rb +26 -0
  41. data/lib/berkeley_library/marc/field_info/var_fields/obsolescible.rb +55 -0
  42. data/lib/berkeley_library/marc/field_info/var_fields/section.rb +50 -0
  43. data/lib/berkeley_library/marc/field_info/var_fields/subfield_def.rb +50 -0
  44. data/lib/berkeley_library/marc/field_info/var_fields/subfield_val.rb +24 -0
  45. data/lib/berkeley_library/marc/field_info/var_fields/var_field_def.rb +62 -0
  46. data/lib/berkeley_library/marc/field_info/var_fields/var_field_list.rb +43 -0
  47. data/lib/berkeley_library/marc/field_info/var_fields/var_field_parser.rb +136 -0
  48. data/lib/berkeley_library/marc/field_info/var_fields/var_field_transform.rb +160 -0
  49. data/lib/berkeley_library/marc/module_info.rb +14 -0
  50. data/lib/marc_extensions.rb +1 -0
  51. data/lib/marc_extensions/data_field.rb +29 -0
  52. data/lib/marc_extensions/field_map.rb +63 -0
  53. data/lib/marc_extensions/record.rb +100 -0
  54. data/lib/marc_extensions/subfield.rb +21 -0
  55. data/lib/marc_extensions/xml_reader.rb +19 -0
  56. data/rakelib/bundle.rake +8 -0
  57. data/rakelib/coverage.rake +11 -0
  58. data/rakelib/gem.rake +54 -0
  59. data/rakelib/rubocop.rake +18 -0
  60. data/rakelib/spec.rake +2 -0
  61. data/spec/.rubocop.yml +37 -0
  62. data/spec/berkeley_library/marc/field_info/var_fields/var_field_def_spec.rb +26 -0
  63. data/spec/berkeley_library/marc/field_info/var_fields/var_field_parser_spec.rb +596 -0
  64. data/spec/berkeley_library/marc/field_info/var_fields/var_field_transform_spec.rb +173 -0
  65. data/spec/berkeley_library/marc/field_info/var_fields_spec.rb +112 -0
  66. data/spec/data/field_info/vf_046.txt +32 -0
  67. data/spec/data/field_info/vf_048.txt +112 -0
  68. data/spec/data/record-187888.xml +78 -0
  69. data/spec/marc_extensions/data_field_spec.rb +13 -0
  70. data/spec/marc_extensions/record_spec.rb +211 -0
  71. data/spec/spec_helper.rb +27 -0
  72. metadata +354 -0
@@ -0,0 +1,44 @@
1
+ // Adapted from https://docs.tind.io/article/vwihxqwp67-marc-fields-used-by-tind
2
+ // Retrieved 2021-01-15
3
+
4
+ // Note: this only includes fields relevant to our TIND/DA implementation.
5
+
6
+ 035 - EXTERNAL IDENTIFIER (NR)
7
+ Indicators
8
+ First - Undefined
9
+ # - Undefined
10
+ Second - Undefined
11
+ # - Undefined
12
+ Subfield Codes
13
+ $a - External identifier (NR)
14
+
15
+ // TODO: is (NR) correct for $p and $q?
16
+ 909 - OAI REPOSITORY METADATA (NR)
17
+ Indicators
18
+ First - ???
19
+ C - ??? // Yes, caps should be invalid; yes, TIND does it anyway
20
+ Second - ???
21
+ 0 - ???
22
+ Subfield Codes
23
+ $o - OAI URI for the record (oai:<hostname>:<record ID>) (NR)
24
+ $p - setSpec for current valid sets (NR)
25
+ $q - setSpec for previously valid sets (NR)
26
+
27
+ // TODO: is (R) correct here?
28
+ 980 - COLLECTION QUERY TAG (R)
29
+ Indicators
30
+ First - Undefined
31
+ # - Undefined
32
+ Second - Undefined
33
+ # - Undefined
34
+ Subfield Codes
35
+ $a - Collection identifier
36
+
37
+ 991 - RESTRICTION STATUS (NR)
38
+ Indicators
39
+ First - Undefined
40
+ # - Undefined
41
+ Second - Undefined
42
+ # - Undefined
43
+ Subfield Codes
44
+ $a - Restriction tag
@@ -0,0 +1,30 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ module FieldInfo
4
+ module VarFields
5
+ TIND_RESERVED_PARSED =
6
+ [{ doc_comments: [{ comment: 'Adapted from https://docs.tind.io/article/vwihxqwp67-marc-fields-used-by-tind' },
7
+ { comment: 'Retrieved 2021-01-15' }] },
8
+ { comment: 'Note: this only includes fields relevant to our TIND/DA implementation.' },
9
+ { sections: [{ var_fields: [{ tag: '035',
10
+ desc: 'EXTERNAL IDENTIFIER (NR)',
11
+ indicators: [{ desc: 'Undefined',
12
+ ind1: [{ val: '#', desc: 'Undefined' }] },
13
+ { desc: 'Undefined',
14
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
15
+ subfield_codes: [{ code: 'a',
16
+ desc: 'External identifier (NR)',
17
+ values: [] }] },
18
+ { tag: '991',
19
+ desc: 'RESTRICTION STATUS (NR)',
20
+ indicators: [{ desc: 'Undefined',
21
+ ind1: [{ val: '#',
22
+ desc: 'Undefined' }] },
23
+ { desc: 'Undefined',
24
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
25
+ subfield_codes: [{ code: 'a', desc: 'Restriction tag',
26
+ values: [] }] }] }] }].freeze
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,105 @@
1
+ // Adapted from https://docs.google.com/spreadsheets/d/1QbIxsJGosgzKYBXhDhJJlTZtvCRjz75TRecn1cbDhm8/edit
2
+ // Retrieved 2021-01-15
3
+
4
+ // TODO: check Google Sheets automatically for updates
5
+
6
+ 852 - LOCATION (R)
7
+ Indicators
8
+ First - Undefined
9
+ # - Undefined
10
+ Second - Undefined
11
+ # - Undefined
12
+ Subfield Codes
13
+ $c - Location/Archive/Repository
14
+
15
+ 901 - IDENTIFIERS [INTERNAL] (NR) [REQUIRED WHEN APPLICABLE]
16
+ Indicators
17
+ First - Undefined
18
+ # - Undefined
19
+ Second - Undefined
20
+ # - Undefined
21
+ Subfield Codes
22
+ $a - Ark identifier for METS file [REQUIRED WHEN APPLICABLE]
23
+ $f - Ark identifier for finding aid
24
+ $g - PJID:DBID from GenDB [REQUIRED WHEN APPLICABLE]
25
+ $m - Millennium record number
26
+ $o - OCLC Number
27
+
28
+ 902 - RECORD CREATION DETAILS [INTERNAL] (NR) [REQUIRED]
29
+ Indicators
30
+ First - Undefined
31
+ # - Undefined
32
+ Second - Undefined
33
+ # - Undefined
34
+ Subfield Codes
35
+ $d - Date of batch file creation [REQUIRED]
36
+ $f - METS filename or other file used as source of data [REQUIRED WHEN APPLICABLE]
37
+ $n - Batch uploader's initials [REQUIRED]
38
+ $p - Description of the process and programs used
39
+
40
+ 903 - BIBLIOGRAPHIC CODES FROM MARC RECORD (NR)
41
+ Indicators
42
+ First - Undefined
43
+ # - Undefined
44
+ Second - Undefined
45
+ # - Undefined
46
+ Subfield Codes
47
+ $b - Bibliographic codes
48
+
49
+ 950 - Local/Administrative Notes (R)
50
+ Indicators
51
+ First - Undefined
52
+ # - Undefined
53
+ Second - Undefined
54
+ # - Undefined
55
+ Subfield Codes
56
+ $a - Notes
57
+ $3 - Note type
58
+
59
+ 982 - COLLECTION / PROJECT NAME (R) [REQUIRED]
60
+ Indicators
61
+ First - Undefined
62
+ # - Undefined
63
+ Second - Undefined
64
+ # - Undefined
65
+ Subfield Codes
66
+ $a - Short collection name (displays in facet) [REQUIRED]
67
+ $b - Long collection name (displays in record) [REQUIRED]
68
+ $p - Project Name
69
+
70
+ 991 - RESTRICTION STATUS (NR)
71
+ Indicators
72
+ First - Undefined
73
+ # - Undefined
74
+ Second - Undefined
75
+ # - Undefined
76
+ Subfield Codes
77
+ $a - Restricted collection identifier
78
+
79
+ 998 - AV TRACK INFO (R)
80
+ Indicators
81
+ First - Undefined
82
+ # - Undefined
83
+ Second - Undefined
84
+ # - Undefined
85
+ Subfield Codes
86
+ $a - Duration (hh:mm:ss)
87
+ $t - Title
88
+ $g - Relative path [REQUIRED]
89
+
90
+ // Note: 856 is not documented in the spreadsheet, but inferred from use
91
+
92
+ 856 - ELECTRONIC LOCATION AND ACCESS (R)
93
+ Indicators
94
+ First - Access method
95
+ 4 - HTTP
96
+ Second - Relationship
97
+ # - Content file
98
+ 1 - Library catalog record
99
+ 2 - Related resource
100
+ Subfield Codes
101
+ $s - File size (NR)
102
+ $u - Uniform Resource Identifier (NR) [REQUIRED]
103
+ $y - Link text (NR)
104
+ $z - Comment (NR)
105
+ $9 - Checksum (NR)
@@ -0,0 +1,114 @@
1
+ # rubocop:disable Metrics/ModuleLength
2
+ module BerkeleyLibrary
3
+ module Marc
4
+ module FieldInfo
5
+ module VarFields
6
+ UCBLIT_TIND_PARSED =
7
+ [{ doc_comments: [{ comment: 'Adapted from https://docs.google.com/spreadsheets/d/1QbIxsJGosgzKYBXhDhJJlTZtvCRjz75TRecn1cbDhm8/edit' },
8
+ { comment: 'Retrieved 2021-01-15' }] },
9
+ { comment: 'TODO: check Google Sheets automatically for updates' },
10
+ { sections: [{ var_fields: [{ tag: '852',
11
+ desc: 'LOCATION (R)',
12
+ indicators: [{ desc: 'Undefined',
13
+ ind1: [{ val: '#', desc: 'Undefined' }] },
14
+ { desc: 'Undefined',
15
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
16
+ subfield_codes: [{ code: 'c',
17
+ desc: 'Location/Archive/Repository',
18
+ values: [] }] },
19
+ { tag: '901',
20
+ desc: 'IDENTIFIERS [INTERNAL] (NR) [REQUIRED WHEN APPLICABLE]',
21
+ indicators: [{ desc: 'Undefined',
22
+ ind1: [{ val: '#',
23
+ desc: 'Undefined' }] },
24
+ { desc: 'Undefined',
25
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
26
+ subfield_codes: [{ code: 'a',
27
+ desc: 'Ark identifier for METS file [REQUIRED WHEN APPLICABLE]',
28
+ values: [] },
29
+ { code: 'f',
30
+ desc: 'Ark identifier for finding aid',
31
+ values: [] },
32
+ { code: 'g',
33
+ desc: 'PJID:DBID from GenDB [REQUIRED WHEN APPLICABLE]',
34
+ values: [] },
35
+ { code: 'm', desc: 'Millennium record number', values: [] },
36
+ { code: 'o', desc: 'OCLC Number', values: [] }] },
37
+ { tag: '902',
38
+ desc: 'RECORD CREATION DETAILS [INTERNAL] (NR) [REQUIRED]',
39
+ indicators: [{ desc: 'Undefined',
40
+ ind1: [{ val: '#',
41
+ desc: 'Undefined' }] },
42
+ { desc: 'Undefined',
43
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
44
+ subfield_codes: [{ code: 'd',
45
+ desc: 'Date of batch file creation [REQUIRED]',
46
+ values: [] },
47
+ { code: 'f',
48
+ desc: 'METS filename or other file used as source of data [REQUIRED WHEN APPLICABLE]',
49
+ values: [] },
50
+ { code: 'n',
51
+ desc: "Batch uploader's initials [REQUIRED]",
52
+ values: [] },
53
+ { code: 'p',
54
+ desc: 'Description of the process and programs used',
55
+ values: [] }] },
56
+ { tag: '903',
57
+ desc: 'BIBLIOGRAPHIC CODES FROM MARC RECORD (NR)',
58
+ indicators: [{ desc: 'Undefined',
59
+ ind1: [{ val: '#',
60
+ desc: 'Undefined' }] },
61
+ { desc: 'Undefined',
62
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
63
+ subfield_codes: [{ code: 'b', desc: 'Bibliographic codes',
64
+ values: [] }] },
65
+ { tag: '950',
66
+ desc: 'Local/Administrative Notes (R)',
67
+ indicators: [{ desc: 'Undefined',
68
+ ind1: [{ val: '#',
69
+ desc: 'Undefined' }] },
70
+ { desc: 'Undefined',
71
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
72
+ subfield_codes: [{ code: 'a', desc: 'Notes', values: [] },
73
+ { code: '3', desc: 'Note type', values: [] }] },
74
+ { tag: '982',
75
+ desc: 'COLLECTION / PROJECT NAME (R) [REQUIRED]',
76
+ indicators: [{ desc: 'Undefined',
77
+ ind1: [{ val: '#',
78
+ desc: 'Undefined' }] },
79
+ { desc: 'Undefined',
80
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
81
+ subfield_codes: [{ code: 'a',
82
+ desc: 'Short collection name (displays in facet) [REQUIRED]',
83
+ values: [] },
84
+ { code: 'b',
85
+ desc: 'Long collection name (displays in record) [REQUIRED]',
86
+ values: [] },
87
+ { code: 'p', desc: 'Project Name', values: [] }] },
88
+ { tag: '991',
89
+ desc: 'RESTRICTION STATUS (NR)',
90
+ indicators: [{ desc: 'Undefined',
91
+ ind1: [{ val: '#',
92
+ desc: 'Undefined' }] },
93
+ { desc: 'Undefined',
94
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
95
+ subfield_codes: [{ code: 'a',
96
+ desc: 'Restricted collection identifier',
97
+ values: [] }] },
98
+ { tag: '998',
99
+ desc: 'AV TRACK INFO (R)',
100
+ indicators: [{ desc: 'Undefined',
101
+ ind1: [{ val: '#',
102
+ desc: 'Undefined' }] },
103
+ { desc: 'Undefined',
104
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
105
+ subfield_codes: [{ code: 'a', desc: 'Duration (hh:mm:ss)', values: [] },
106
+ { code: 't', desc: 'Title', values: [] },
107
+ { code: 'g',
108
+ desc: 'Relative path [REQUIRED]',
109
+ values: [] }] }] }] }].freeze
110
+ end
111
+ end
112
+ end
113
+ end
114
+ # rubocop:enable Metrics/ModuleLength
@@ -0,0 +1,39 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/ind_val_def'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ class IndDef
9
+ # TODO: include Comparable
10
+ include Obsolescible
11
+
12
+ POS_NAMES = { 1 => 'First', 2 => 'Second' }.freeze
13
+
14
+ attr_reader :pos
15
+ attr_reader :desc
16
+ attr_reader :val_defs
17
+
18
+ def initialize(pos:, desc:, val_defs:)
19
+ raise ArgumentError, "Not a valid indicator position: #{pos.inspect}" unless [1, 2].include?(pos)
20
+
21
+ @pos = pos
22
+ @desc = desc
23
+ @val_defs = val_defs
24
+ end
25
+
26
+ def reject_obsoletes
27
+ IndDef.new(pos: pos, desc: desc, val_defs: _reject_obsolete(val_defs))
28
+ end
29
+
30
+ def to_s
31
+ lines = ["#{POS_NAMES[pos]} - #{desc}"]
32
+ val_defs.each { |v| lines << " #{v}" }
33
+ lines.join("\n")
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+ class IndValDef
8
+ # TODO: include Comparable
9
+ include Obsolescible
10
+
11
+ attr_reader :val
12
+ attr_reader :desc
13
+
14
+ # TODO: destructure range values in some useful way (e.g. 130)
15
+ def initialize(val:, desc:)
16
+ @val = val
17
+ @desc = desc
18
+ end
19
+
20
+ def to_s
21
+ "#{val} - #{desc}"
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,26 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+ class InstrumentOrVoicesCode
8
+ # TODO: include Comparable
9
+ include Obsolescible
10
+
11
+ attr_reader :val
12
+ attr_reader :desc
13
+
14
+ def initialize(val:, desc:)
15
+ @val = val
16
+ @desc = desc
17
+ end
18
+
19
+ def to_s
20
+ "#{val} - #{desc}"
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,55 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ module FieldInfo
4
+ module VarFields
5
+ # Mixin for possibly-obsolete members of a MARC field info tree
6
+ module Obsolescible
7
+
8
+ # Description marker for obsolete elements in a MARC field list
9
+ OBSOLETE = '[OBSOLETE]'.freeze
10
+
11
+ # Return true if this Obsolescible's description marks
12
+ # it as obsolete, false otherwise
13
+ # @return [Boolean] true if obsolete, false otherwise
14
+ def obsolete?
15
+ return unless respond_to?(:desc)
16
+
17
+ desc&.include?(OBSOLETE)
18
+ end
19
+
20
+ # Return a copy of this object with all obsolete nodes
21
+ # in its subtree removed, or the object itself if it has
22
+ # no descendants. Note that in the (pathological) situation
23
+ # of a non-obsolete, non-leaf node with no non-obsolete
24
+ # descendants, an empty object will be returned.
25
+ def reject_obsoletes
26
+ # default implementation for leaf nodes
27
+ self
28
+ end
29
+
30
+ # Return true if this object's subtree is empty, false if
31
+ # this object is a leaf node or has a non-empty subtree.
32
+ def empty?
33
+ # default implementation for leaf nodes
34
+ false
35
+ end
36
+
37
+ protected
38
+
39
+ # Utility method for deep-rejecting obsolete members and their
40
+ # obsolete descendants from a list of Obsolescibles.
41
+ # @param obs [Array<Obsolescible>] a list of Obsolescibles.
42
+ # @return [Array<Obsolescible>] a filtered list with no obsolete nodes,
43
+ # and no non-leaf nodes with only obsolete descendants
44
+ def _reject_obsolete(obs)
45
+ obs.lazy
46
+ .reject(&:obsolete?)
47
+ .map(&:reject_obsoletes)
48
+ .reject(&:empty?)
49
+ .to_a
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end