berkeley_library-marc 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/marc.iml +101 -0
  6. data/.idea/misc.xml +4 -0
  7. data/.idea/modules.xml +8 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +12 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +4 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-marc.gemspec +42 -0
  21. data/docker-compose.yml +15 -0
  22. data/lib/.rubocop.yml +6 -0
  23. data/lib/berkeley_library/marc.rb +3 -0
  24. data/lib/berkeley_library/marc/field_info.rb +1 -0
  25. data/lib/berkeley_library/marc/field_info/ctrl_fields/data/ctrl_fields_standard.txt +2143 -0
  26. data/lib/berkeley_library/marc/field_info/leader/data/leader_standard.txt +87 -0
  27. data/lib/berkeley_library/marc/field_info/var_fields.rb +46 -0
  28. data/lib/berkeley_library/marc/field_info/var_fields/data.rb +4 -0
  29. data/lib/berkeley_library/marc/field_info/var_fields/data/mapping-orig.tsv +265 -0
  30. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx.txt +53 -0
  31. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx_parsed.rb +51 -0
  32. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard.txt +5458 -0
  33. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard_parsed.rb +6577 -0
  34. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved.txt +44 -0
  35. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved_parsed.rb +30 -0
  36. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind.txt +105 -0
  37. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind_parsed.rb +114 -0
  38. data/lib/berkeley_library/marc/field_info/var_fields/ind_def.rb +39 -0
  39. data/lib/berkeley_library/marc/field_info/var_fields/ind_val_def.rb +27 -0
  40. data/lib/berkeley_library/marc/field_info/var_fields/instrument_or_voices_code.rb +26 -0
  41. data/lib/berkeley_library/marc/field_info/var_fields/obsolescible.rb +55 -0
  42. data/lib/berkeley_library/marc/field_info/var_fields/section.rb +50 -0
  43. data/lib/berkeley_library/marc/field_info/var_fields/subfield_def.rb +50 -0
  44. data/lib/berkeley_library/marc/field_info/var_fields/subfield_val.rb +24 -0
  45. data/lib/berkeley_library/marc/field_info/var_fields/var_field_def.rb +62 -0
  46. data/lib/berkeley_library/marc/field_info/var_fields/var_field_list.rb +43 -0
  47. data/lib/berkeley_library/marc/field_info/var_fields/var_field_parser.rb +136 -0
  48. data/lib/berkeley_library/marc/field_info/var_fields/var_field_transform.rb +160 -0
  49. data/lib/berkeley_library/marc/module_info.rb +14 -0
  50. data/lib/marc_extensions.rb +1 -0
  51. data/lib/marc_extensions/data_field.rb +29 -0
  52. data/lib/marc_extensions/field_map.rb +63 -0
  53. data/lib/marc_extensions/record.rb +100 -0
  54. data/lib/marc_extensions/subfield.rb +21 -0
  55. data/lib/marc_extensions/xml_reader.rb +19 -0
  56. data/rakelib/bundle.rake +8 -0
  57. data/rakelib/coverage.rake +11 -0
  58. data/rakelib/gem.rake +54 -0
  59. data/rakelib/rubocop.rake +18 -0
  60. data/rakelib/spec.rake +2 -0
  61. data/spec/.rubocop.yml +37 -0
  62. data/spec/berkeley_library/marc/field_info/var_fields/var_field_def_spec.rb +26 -0
  63. data/spec/berkeley_library/marc/field_info/var_fields/var_field_parser_spec.rb +596 -0
  64. data/spec/berkeley_library/marc/field_info/var_fields/var_field_transform_spec.rb +173 -0
  65. data/spec/berkeley_library/marc/field_info/var_fields_spec.rb +112 -0
  66. data/spec/data/field_info/vf_046.txt +32 -0
  67. data/spec/data/field_info/vf_048.txt +112 -0
  68. data/spec/data/record-187888.xml +78 -0
  69. data/spec/marc_extensions/data_field_spec.rb +13 -0
  70. data/spec/marc_extensions/record_spec.rb +211 -0
  71. data/spec/spec_helper.rb +27 -0
  72. metadata +354 -0
@@ -0,0 +1,44 @@
1
+ // Adapted from https://docs.tind.io/article/vwihxqwp67-marc-fields-used-by-tind
2
+ // Retrieved 2021-01-15
3
+
4
+ // Note: this only includes fields relevant to our TIND/DA implementation.
5
+
6
+ 035 - EXTERNAL IDENTIFIER (NR)
7
+ Indicators
8
+ First - Undefined
9
+ # - Undefined
10
+ Second - Undefined
11
+ # - Undefined
12
+ Subfield Codes
13
+ $a - External identifier (NR)
14
+
15
+ // TODO: is (NR) correct for $p and $q?
16
+ 909 - OAI REPOSITORY METADATA (NR)
17
+ Indicators
18
+ First - ???
19
+ C - ??? // Yes, caps should be invalid; yes, TIND does it anyway
20
+ Second - ???
21
+ 0 - ???
22
+ Subfield Codes
23
+ $o - OAI URI for the record (oai:<hostname>:<record ID>) (NR)
24
+ $p - setSpec for current valid sets (NR)
25
+ $q - setSpec for previously valid sets (NR)
26
+
27
+ // TODO: is (R) correct here?
28
+ 980 - COLLECTION QUERY TAG (R)
29
+ Indicators
30
+ First - Undefined
31
+ # - Undefined
32
+ Second - Undefined
33
+ # - Undefined
34
+ Subfield Codes
35
+ $a - Collection identifier
36
+
37
+ 991 - RESTRICTION STATUS (NR)
38
+ Indicators
39
+ First - Undefined
40
+ # - Undefined
41
+ Second - Undefined
42
+ # - Undefined
43
+ Subfield Codes
44
+ $a - Restriction tag
@@ -0,0 +1,30 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ module FieldInfo
4
+ module VarFields
5
+ TIND_RESERVED_PARSED =
6
+ [{ doc_comments: [{ comment: 'Adapted from https://docs.tind.io/article/vwihxqwp67-marc-fields-used-by-tind' },
7
+ { comment: 'Retrieved 2021-01-15' }] },
8
+ { comment: 'Note: this only includes fields relevant to our TIND/DA implementation.' },
9
+ { sections: [{ var_fields: [{ tag: '035',
10
+ desc: 'EXTERNAL IDENTIFIER (NR)',
11
+ indicators: [{ desc: 'Undefined',
12
+ ind1: [{ val: '#', desc: 'Undefined' }] },
13
+ { desc: 'Undefined',
14
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
15
+ subfield_codes: [{ code: 'a',
16
+ desc: 'External identifier (NR)',
17
+ values: [] }] },
18
+ { tag: '991',
19
+ desc: 'RESTRICTION STATUS (NR)',
20
+ indicators: [{ desc: 'Undefined',
21
+ ind1: [{ val: '#',
22
+ desc: 'Undefined' }] },
23
+ { desc: 'Undefined',
24
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
25
+ subfield_codes: [{ code: 'a', desc: 'Restriction tag',
26
+ values: [] }] }] }] }].freeze
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,105 @@
1
+ // Adapted from https://docs.google.com/spreadsheets/d/1QbIxsJGosgzKYBXhDhJJlTZtvCRjz75TRecn1cbDhm8/edit
2
+ // Retrieved 2021-01-15
3
+
4
+ // TODO: check Google Sheets automatically for updates
5
+
6
+ 852 - LOCATION (R)
7
+ Indicators
8
+ First - Undefined
9
+ # - Undefined
10
+ Second - Undefined
11
+ # - Undefined
12
+ Subfield Codes
13
+ $c - Location/Archive/Repository
14
+
15
+ 901 - IDENTIFIERS [INTERNAL] (NR) [REQUIRED WHEN APPLICABLE]
16
+ Indicators
17
+ First - Undefined
18
+ # - Undefined
19
+ Second - Undefined
20
+ # - Undefined
21
+ Subfield Codes
22
+ $a - Ark identifier for METS file [REQUIRED WHEN APPLICABLE]
23
+ $f - Ark identifier for finding aid
24
+ $g - PJID:DBID from GenDB [REQUIRED WHEN APPLICABLE]
25
+ $m - Millennium record number
26
+ $o - OCLC Number
27
+
28
+ 902 - RECORD CREATION DETAILS [INTERNAL] (NR) [REQUIRED]
29
+ Indicators
30
+ First - Undefined
31
+ # - Undefined
32
+ Second - Undefined
33
+ # - Undefined
34
+ Subfield Codes
35
+ $d - Date of batch file creation [REQUIRED]
36
+ $f - METS filename or other file used as source of data [REQUIRED WHEN APPLICABLE]
37
+ $n - Batch uploader's initials [REQUIRED]
38
+ $p - Description of the process and programs used
39
+
40
+ 903 - BIBLIOGRAPHIC CODES FROM MARC RECORD (NR)
41
+ Indicators
42
+ First - Undefined
43
+ # - Undefined
44
+ Second - Undefined
45
+ # - Undefined
46
+ Subfield Codes
47
+ $b - Bibliographic codes
48
+
49
+ 950 - Local/Administrative Notes (R)
50
+ Indicators
51
+ First - Undefined
52
+ # - Undefined
53
+ Second - Undefined
54
+ # - Undefined
55
+ Subfield Codes
56
+ $a - Notes
57
+ $3 - Note type
58
+
59
+ 982 - COLLECTION / PROJECT NAME (R) [REQUIRED]
60
+ Indicators
61
+ First - Undefined
62
+ # - Undefined
63
+ Second - Undefined
64
+ # - Undefined
65
+ Subfield Codes
66
+ $a - Short collection name (displays in facet) [REQUIRED]
67
+ $b - Long collection name (displays in record) [REQUIRED]
68
+ $p - Project Name
69
+
70
+ 991 - RESTRICTION STATUS (NR)
71
+ Indicators
72
+ First - Undefined
73
+ # - Undefined
74
+ Second - Undefined
75
+ # - Undefined
76
+ Subfield Codes
77
+ $a - Restricted collection identifier
78
+
79
+ 998 - AV TRACK INFO (R)
80
+ Indicators
81
+ First - Undefined
82
+ # - Undefined
83
+ Second - Undefined
84
+ # - Undefined
85
+ Subfield Codes
86
+ $a - Duration (hh:mm:ss)
87
+ $t - Title
88
+ $g - Relative path [REQUIRED]
89
+
90
+ // Note: 856 is not documented in the spreadsheet, but inferred from use
91
+
92
+ 856 - ELECTRONIC LOCATION AND ACCESS (R)
93
+ Indicators
94
+ First - Access method
95
+ 4 - HTTP
96
+ Second - Relationship
97
+ # - Content file
98
+ 1 - Library catalog record
99
+ 2 - Related resource
100
+ Subfield Codes
101
+ $s - File size (NR)
102
+ $u - Uniform Resource Identifier (NR) [REQUIRED]
103
+ $y - Link text (NR)
104
+ $z - Comment (NR)
105
+ $9 - Checksum (NR)
@@ -0,0 +1,114 @@
1
+ # rubocop:disable Metrics/ModuleLength
2
+ module BerkeleyLibrary
3
+ module Marc
4
+ module FieldInfo
5
+ module VarFields
6
+ UCBLIT_TIND_PARSED =
7
+ [{ doc_comments: [{ comment: 'Adapted from https://docs.google.com/spreadsheets/d/1QbIxsJGosgzKYBXhDhJJlTZtvCRjz75TRecn1cbDhm8/edit' },
8
+ { comment: 'Retrieved 2021-01-15' }] },
9
+ { comment: 'TODO: check Google Sheets automatically for updates' },
10
+ { sections: [{ var_fields: [{ tag: '852',
11
+ desc: 'LOCATION (R)',
12
+ indicators: [{ desc: 'Undefined',
13
+ ind1: [{ val: '#', desc: 'Undefined' }] },
14
+ { desc: 'Undefined',
15
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
16
+ subfield_codes: [{ code: 'c',
17
+ desc: 'Location/Archive/Repository',
18
+ values: [] }] },
19
+ { tag: '901',
20
+ desc: 'IDENTIFIERS [INTERNAL] (NR) [REQUIRED WHEN APPLICABLE]',
21
+ indicators: [{ desc: 'Undefined',
22
+ ind1: [{ val: '#',
23
+ desc: 'Undefined' }] },
24
+ { desc: 'Undefined',
25
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
26
+ subfield_codes: [{ code: 'a',
27
+ desc: 'Ark identifier for METS file [REQUIRED WHEN APPLICABLE]',
28
+ values: [] },
29
+ { code: 'f',
30
+ desc: 'Ark identifier for finding aid',
31
+ values: [] },
32
+ { code: 'g',
33
+ desc: 'PJID:DBID from GenDB [REQUIRED WHEN APPLICABLE]',
34
+ values: [] },
35
+ { code: 'm', desc: 'Millennium record number', values: [] },
36
+ { code: 'o', desc: 'OCLC Number', values: [] }] },
37
+ { tag: '902',
38
+ desc: 'RECORD CREATION DETAILS [INTERNAL] (NR) [REQUIRED]',
39
+ indicators: [{ desc: 'Undefined',
40
+ ind1: [{ val: '#',
41
+ desc: 'Undefined' }] },
42
+ { desc: 'Undefined',
43
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
44
+ subfield_codes: [{ code: 'd',
45
+ desc: 'Date of batch file creation [REQUIRED]',
46
+ values: [] },
47
+ { code: 'f',
48
+ desc: 'METS filename or other file used as source of data [REQUIRED WHEN APPLICABLE]',
49
+ values: [] },
50
+ { code: 'n',
51
+ desc: "Batch uploader's initials [REQUIRED]",
52
+ values: [] },
53
+ { code: 'p',
54
+ desc: 'Description of the process and programs used',
55
+ values: [] }] },
56
+ { tag: '903',
57
+ desc: 'BIBLIOGRAPHIC CODES FROM MARC RECORD (NR)',
58
+ indicators: [{ desc: 'Undefined',
59
+ ind1: [{ val: '#',
60
+ desc: 'Undefined' }] },
61
+ { desc: 'Undefined',
62
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
63
+ subfield_codes: [{ code: 'b', desc: 'Bibliographic codes',
64
+ values: [] }] },
65
+ { tag: '950',
66
+ desc: 'Local/Administrative Notes (R)',
67
+ indicators: [{ desc: 'Undefined',
68
+ ind1: [{ val: '#',
69
+ desc: 'Undefined' }] },
70
+ { desc: 'Undefined',
71
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
72
+ subfield_codes: [{ code: 'a', desc: 'Notes', values: [] },
73
+ { code: '3', desc: 'Note type', values: [] }] },
74
+ { tag: '982',
75
+ desc: 'COLLECTION / PROJECT NAME (R) [REQUIRED]',
76
+ indicators: [{ desc: 'Undefined',
77
+ ind1: [{ val: '#',
78
+ desc: 'Undefined' }] },
79
+ { desc: 'Undefined',
80
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
81
+ subfield_codes: [{ code: 'a',
82
+ desc: 'Short collection name (displays in facet) [REQUIRED]',
83
+ values: [] },
84
+ { code: 'b',
85
+ desc: 'Long collection name (displays in record) [REQUIRED]',
86
+ values: [] },
87
+ { code: 'p', desc: 'Project Name', values: [] }] },
88
+ { tag: '991',
89
+ desc: 'RESTRICTION STATUS (NR)',
90
+ indicators: [{ desc: 'Undefined',
91
+ ind1: [{ val: '#',
92
+ desc: 'Undefined' }] },
93
+ { desc: 'Undefined',
94
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
95
+ subfield_codes: [{ code: 'a',
96
+ desc: 'Restricted collection identifier',
97
+ values: [] }] },
98
+ { tag: '998',
99
+ desc: 'AV TRACK INFO (R)',
100
+ indicators: [{ desc: 'Undefined',
101
+ ind1: [{ val: '#',
102
+ desc: 'Undefined' }] },
103
+ { desc: 'Undefined',
104
+ ind2: [{ val: '#', desc: 'Undefined' }] }],
105
+ subfield_codes: [{ code: 'a', desc: 'Duration (hh:mm:ss)', values: [] },
106
+ { code: 't', desc: 'Title', values: [] },
107
+ { code: 'g',
108
+ desc: 'Relative path [REQUIRED]',
109
+ values: [] }] }] }] }].freeze
110
+ end
111
+ end
112
+ end
113
+ end
114
+ # rubocop:enable Metrics/ModuleLength
@@ -0,0 +1,39 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/ind_val_def'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ class IndDef
9
+ # TODO: include Comparable
10
+ include Obsolescible
11
+
12
+ POS_NAMES = { 1 => 'First', 2 => 'Second' }.freeze
13
+
14
+ attr_reader :pos
15
+ attr_reader :desc
16
+ attr_reader :val_defs
17
+
18
+ def initialize(pos:, desc:, val_defs:)
19
+ raise ArgumentError, "Not a valid indicator position: #{pos.inspect}" unless [1, 2].include?(pos)
20
+
21
+ @pos = pos
22
+ @desc = desc
23
+ @val_defs = val_defs
24
+ end
25
+
26
+ def reject_obsoletes
27
+ IndDef.new(pos: pos, desc: desc, val_defs: _reject_obsolete(val_defs))
28
+ end
29
+
30
+ def to_s
31
+ lines = ["#{POS_NAMES[pos]} - #{desc}"]
32
+ val_defs.each { |v| lines << " #{v}" }
33
+ lines.join("\n")
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,27 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+ class IndValDef
8
+ # TODO: include Comparable
9
+ include Obsolescible
10
+
11
+ attr_reader :val
12
+ attr_reader :desc
13
+
14
+ # TODO: destructure range values in some useful way (e.g. 130)
15
+ def initialize(val:, desc:)
16
+ @val = val
17
+ @desc = desc
18
+ end
19
+
20
+ def to_s
21
+ "#{val} - #{desc}"
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,26 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+ class InstrumentOrVoicesCode
8
+ # TODO: include Comparable
9
+ include Obsolescible
10
+
11
+ attr_reader :val
12
+ attr_reader :desc
13
+
14
+ def initialize(val:, desc:)
15
+ @val = val
16
+ @desc = desc
17
+ end
18
+
19
+ def to_s
20
+ "#{val} - #{desc}"
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,55 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ module FieldInfo
4
+ module VarFields
5
+ # Mixin for possibly-obsolete members of a MARC field info tree
6
+ module Obsolescible
7
+
8
+ # Description marker for obsolete elements in a MARC field list
9
+ OBSOLETE = '[OBSOLETE]'.freeze
10
+
11
+ # Return true if this Obsolescible's description marks
12
+ # it as obsolete, false otherwise
13
+ # @return [Boolean] true if obsolete, false otherwise
14
+ def obsolete?
15
+ return unless respond_to?(:desc)
16
+
17
+ desc&.include?(OBSOLETE)
18
+ end
19
+
20
+ # Return a copy of this object with all obsolete nodes
21
+ # in its subtree removed, or the object itself if it has
22
+ # no descendants. Note that in the (pathological) situation
23
+ # of a non-obsolete, non-leaf node with no non-obsolete
24
+ # descendants, an empty object will be returned.
25
+ def reject_obsoletes
26
+ # default implementation for leaf nodes
27
+ self
28
+ end
29
+
30
+ # Return true if this object's subtree is empty, false if
31
+ # this object is a leaf node or has a non-empty subtree.
32
+ def empty?
33
+ # default implementation for leaf nodes
34
+ false
35
+ end
36
+
37
+ protected
38
+
39
+ # Utility method for deep-rejecting obsolete members and their
40
+ # obsolete descendants from a list of Obsolescibles.
41
+ # @param obs [Array<Obsolescible>] a list of Obsolescibles.
42
+ # @return [Array<Obsolescible>] a filtered list with no obsolete nodes,
43
+ # and no non-leaf nodes with only obsolete descendants
44
+ def _reject_obsolete(obs)
45
+ obs.lazy
46
+ .reject(&:obsolete?)
47
+ .map(&:reject_obsoletes)
48
+ .reject(&:empty?)
49
+ .to_a
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end