berkeley_library-marc 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/marc.iml +101 -0
  6. data/.idea/misc.xml +4 -0
  7. data/.idea/modules.xml +8 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +12 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +4 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-marc.gemspec +42 -0
  21. data/docker-compose.yml +15 -0
  22. data/lib/.rubocop.yml +6 -0
  23. data/lib/berkeley_library/marc.rb +3 -0
  24. data/lib/berkeley_library/marc/field_info.rb +1 -0
  25. data/lib/berkeley_library/marc/field_info/ctrl_fields/data/ctrl_fields_standard.txt +2143 -0
  26. data/lib/berkeley_library/marc/field_info/leader/data/leader_standard.txt +87 -0
  27. data/lib/berkeley_library/marc/field_info/var_fields.rb +46 -0
  28. data/lib/berkeley_library/marc/field_info/var_fields/data.rb +4 -0
  29. data/lib/berkeley_library/marc/field_info/var_fields/data/mapping-orig.tsv +265 -0
  30. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx.txt +53 -0
  31. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx_parsed.rb +51 -0
  32. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard.txt +5458 -0
  33. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard_parsed.rb +6577 -0
  34. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved.txt +44 -0
  35. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved_parsed.rb +30 -0
  36. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind.txt +105 -0
  37. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind_parsed.rb +114 -0
  38. data/lib/berkeley_library/marc/field_info/var_fields/ind_def.rb +39 -0
  39. data/lib/berkeley_library/marc/field_info/var_fields/ind_val_def.rb +27 -0
  40. data/lib/berkeley_library/marc/field_info/var_fields/instrument_or_voices_code.rb +26 -0
  41. data/lib/berkeley_library/marc/field_info/var_fields/obsolescible.rb +55 -0
  42. data/lib/berkeley_library/marc/field_info/var_fields/section.rb +50 -0
  43. data/lib/berkeley_library/marc/field_info/var_fields/subfield_def.rb +50 -0
  44. data/lib/berkeley_library/marc/field_info/var_fields/subfield_val.rb +24 -0
  45. data/lib/berkeley_library/marc/field_info/var_fields/var_field_def.rb +62 -0
  46. data/lib/berkeley_library/marc/field_info/var_fields/var_field_list.rb +43 -0
  47. data/lib/berkeley_library/marc/field_info/var_fields/var_field_parser.rb +136 -0
  48. data/lib/berkeley_library/marc/field_info/var_fields/var_field_transform.rb +160 -0
  49. data/lib/berkeley_library/marc/module_info.rb +14 -0
  50. data/lib/marc_extensions.rb +1 -0
  51. data/lib/marc_extensions/data_field.rb +29 -0
  52. data/lib/marc_extensions/field_map.rb +63 -0
  53. data/lib/marc_extensions/record.rb +100 -0
  54. data/lib/marc_extensions/subfield.rb +21 -0
  55. data/lib/marc_extensions/xml_reader.rb +19 -0
  56. data/rakelib/bundle.rake +8 -0
  57. data/rakelib/coverage.rake +11 -0
  58. data/rakelib/gem.rake +54 -0
  59. data/rakelib/rubocop.rake +18 -0
  60. data/rakelib/spec.rake +2 -0
  61. data/spec/.rubocop.yml +37 -0
  62. data/spec/berkeley_library/marc/field_info/var_fields/var_field_def_spec.rb +26 -0
  63. data/spec/berkeley_library/marc/field_info/var_fields/var_field_parser_spec.rb +596 -0
  64. data/spec/berkeley_library/marc/field_info/var_fields/var_field_transform_spec.rb +173 -0
  65. data/spec/berkeley_library/marc/field_info/var_fields_spec.rb +112 -0
  66. data/spec/data/field_info/vf_046.txt +32 -0
  67. data/spec/data/field_info/vf_048.txt +112 -0
  68. data/spec/data/record-187888.xml +78 -0
  69. data/spec/marc_extensions/data_field_spec.rb +13 -0
  70. data/spec/marc_extensions/record_spec.rb +211 -0
  71. data/spec/spec_helper.rb +27 -0
  72. metadata +354 -0
@@ -0,0 +1,50 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/var_field_def'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ class Section
9
+ include Enumerable
10
+ include Obsolescible
11
+
12
+ attr_reader :desc
13
+ attr_reader :var_fields
14
+
15
+ def initialize(desc:, var_fields:)
16
+ @desc = desc
17
+ @var_fields = var_fields
18
+ end
19
+
20
+ def each(&block)
21
+ return to_enum(:each) unless block_given?
22
+
23
+ var_fields.each(&block)
24
+ end
25
+
26
+ def size
27
+ var_fields.size
28
+ end
29
+
30
+ def empty?
31
+ var_fields.empty?
32
+ end
33
+
34
+ def reject_obsoletes
35
+ Section.new(
36
+ desc: desc,
37
+ var_fields: _reject_obsolete(var_fields)
38
+ )
39
+ end
40
+
41
+ def to_s
42
+ blocks = ["--#{desc}--"]
43
+ blocks.concat(var_fields.map(&:to_s))
44
+ blocks.join("\n")
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,50 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/subfield_val'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ # TODO: destructure range codes in some useful way (e.g. 886a-z, 0-9)
9
+ class SubfieldDef
10
+ include Obsolescible
11
+
12
+ attr_reader :code, :desc, :values
13
+
14
+ def initialize(code:, desc:, values: [])
15
+ @code = code
16
+ @desc = desc
17
+ @values = values
18
+ end
19
+
20
+ def reject_obsoletes
21
+ SubfieldDef.new(code: code, desc: desc, values: _reject_obsolete(values))
22
+ end
23
+
24
+ def each_value(&block)
25
+ return to_enum(:each_value) unless block_given?
26
+
27
+ values.each(&block)
28
+ end
29
+
30
+ INDENT = ' '.freeze
31
+ private_constant :INDENT
32
+
33
+ def to_s
34
+ lines = ["$#{code_str} - #{desc}"]
35
+ values.each { |v| lines << INDENT + v.to_s }
36
+ lines.join("\n")
37
+ end
38
+
39
+ private
40
+
41
+ def code_str
42
+ return code.to_s unless code.is_a?(Range)
43
+
44
+ "#{code.first}-#{code.last}"
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,24 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ module FieldInfo
4
+ module VarFields
5
+ class SubfieldVal
6
+ # TODO: include Comparable
7
+ include Obsolescible
8
+
9
+ attr_reader :val
10
+ attr_reader :desc
11
+
12
+ def initialize(val:, desc:)
13
+ @val = val
14
+ @desc = desc
15
+ end
16
+
17
+ def to_s
18
+ "#{val} - #{desc}"
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,62 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/ind_def'
2
+ require 'berkeley_library/marc/field_info/var_fields/subfield_def'
3
+ require 'berkeley_library/marc/field_info/var_fields/instrument_or_voices_code'
4
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
5
+
6
+ module BerkeleyLibrary
7
+ module Marc
8
+ module FieldInfo
9
+ module VarFields
10
+ class VarFieldDef
11
+ include Obsolescible
12
+
13
+ attr_reader :tag, :desc, :indicators, :subfield_codes, :inst_or_voices_codes
14
+
15
+ def initialize(tag:, desc:, indicators: [], subfield_codes: [], inst_or_voices_codes: [])
16
+ @tag = tag
17
+ @desc = desc
18
+ @indicators = indicators # TODO: split out ind1/ind2 (but what about obsolete?)
19
+ @subfield_codes = subfield_codes
20
+ @inst_or_voices_codes = inst_or_voices_codes
21
+ end
22
+
23
+ def to_s
24
+ blocks = ["#{tag} - #{desc}"]
25
+
26
+ concat_indented(blocks, indicators, 'Indicators')
27
+ concat_indented(blocks, subfield_codes, 'Subfield Codes')
28
+ concat_indented(blocks, inst_or_voices_codes, 'Instrument or Voices Codes')
29
+
30
+ blocks.join("\n")
31
+ end
32
+
33
+ def reject_obsoletes
34
+ VarFieldDef.new(
35
+ tag: tag,
36
+ desc: desc,
37
+ indicators: _reject_obsolete(indicators),
38
+ subfield_codes: _reject_obsolete(subfield_codes),
39
+ inst_or_voices_codes: _reject_obsolete(inst_or_voices_codes)
40
+ )
41
+ end
42
+
43
+ private
44
+
45
+ INDENT = ' '.freeze
46
+ private_constant :INDENT
47
+
48
+ def concat_indented(blocks, values, header)
49
+ return if values.empty?
50
+
51
+ blocks << INDENT + header
52
+ values.each do |v|
53
+ lines = v.to_s.lines(chomp: true)
54
+ lines.each { |line| blocks << INDENT + INDENT + line }
55
+ end
56
+ end
57
+
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,43 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/section'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ class VarFieldList
9
+ include Enumerable
10
+ include Obsolescible
11
+
12
+ attr_reader :desc, :sections
13
+
14
+ def initialize(desc:, sections:)
15
+ @desc = desc
16
+ @sections = sections
17
+ end
18
+
19
+ def each(&block)
20
+ return to_enum(:each) unless block_given?
21
+
22
+ sections.each { |section| section.each(&block) }
23
+ end
24
+
25
+ def size
26
+ sections.sum(&:size)
27
+ end
28
+
29
+ def reject_obsoletes
30
+ VarFieldList.new(
31
+ desc: desc,
32
+ sections: _reject_obsolete(sections)
33
+ )
34
+ end
35
+
36
+ def to_s
37
+ sections.map(&:to_s).join("\n")
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,136 @@
1
+ require 'parslet'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+
8
+ # rubocop:disable Style/BlockDelimiters
9
+ class VarFieldParser < Parslet::Parser
10
+
11
+ # ------------------------------------------------------------
12
+ # structural
13
+
14
+ rule(:blank) { match('[[:blank:]]') }
15
+
16
+ rule(:nonhyphen) { match('[\t[:print:]&&[^-]]') }
17
+
18
+ rule(:nonterminal_hyphen) { match('-(?!-?$)') }
19
+
20
+ rule(:printable) { nonhyphen | nonterminal_hyphen }
21
+
22
+ rule(:ln_br) { str("\r\n") | match('[\n\v\f\r\u0085\u2028\u2029]') }
23
+
24
+ rule(:eof) { any.absent? }
25
+
26
+ rule(:eol) { blank.repeat(0) >> ln_br }
27
+
28
+ rule(:text) { printable.repeat(0) }
29
+
30
+ rule(:comment) { blank.repeat(0) >> str('//') >> blank.repeat(0) >> text.as(:comment) }
31
+
32
+ # non-captured
33
+ rule(:nc) { (eol | comment).repeat(0) }
34
+
35
+ rule(:indent) { (eol | comment).repeat(1) >> blank.repeat(1) }
36
+
37
+ # ------------------------------------------------------------
38
+ # indicators
39
+
40
+ rule(:ind_header) { str('Indicators') }
41
+
42
+ rule(:ind1_header) { str('First - ') >> text.as(:desc) }
43
+
44
+ rule(:ind2_header) { str('Second - ') >> text.as(:desc) }
45
+
46
+ rule(:ind_val) {
47
+ # Yes, caps should be invalid; yes, TIND does it anyway
48
+ match('[0-9a-zA-Z#]')
49
+ }
50
+
51
+ # Relatively rare -- e.g. 130
52
+ rule(:ind_val_range) { match('[0-9]') >> str('-') >> match('[0-9]') }
53
+
54
+ rule(:ind_def) { (ind_val_range | ind_val).as(:val) >> str(' - ') >> text.as(:desc) }
55
+
56
+ rule(:ind1) { ind1_header >> (indent >> ind_def).repeat(0).as(:ind1) }
57
+
58
+ rule(:ind2) { ind2_header >> (indent >> ind_def).repeat(0).as(:ind2) }
59
+
60
+ rule(:indicators) do
61
+ # TODO: enforce that we only get multiples when all but one are obsolete
62
+ ind_header >> (indent >> (ind1 | ind2)).repeat(0)
63
+ end
64
+
65
+ # ------------------------------------------------------------
66
+ # subfield definitions
67
+
68
+ rule(:sf_header) { str('Subfield Codes') }
69
+
70
+ rule(:sf_code) { match('[0-9a-z]') }
71
+
72
+ # rare (e.g. 034a)
73
+ rule(:subfield_value) { match('[^$ \t]').repeat(1).as(:val) >> str(' - ') >> text.as(:desc) }
74
+
75
+ rule(:subfield_def) {
76
+ str('$') >>
77
+ (
78
+ (sf_code.as(:code1) >> str('-') >> sf_code.as(:code2)) | # range - rarely used (e.g. 886a-z, 0-9)
79
+ sf_code.as(:code) # single code - typical case
80
+ ) >>
81
+ (str(' - ') | str(' ')) >> text.as(:desc) >> # 880 doesn't have -, probably a typo
82
+ (indent >> subfield_value).repeat(0).as(:values)
83
+ }
84
+
85
+ rule(:subfield_codes) { sf_header.maybe >> (indent >> subfield_def).repeat(1) }
86
+
87
+ # instrument or voices codes (only for 048)
88
+
89
+ rule(:ivc_header) { str('Instrument or Voices Codes') }
90
+
91
+ rule(:ivc_value) { match('[a-z]').repeat(2, 2).as(:val) >> str(' - ') >> text.as(:desc) }
92
+
93
+ rule(:ivc_def) { ivc_header >> (indent >> ivc_value).repeat(1) }
94
+
95
+ # ------------------------------------------------------------
96
+ # variable fields
97
+
98
+ rule(:vf_header) { match('[0-9]').repeat(3, 3).as(:tag) >> str(' - ') >> text.as(:desc) }
99
+
100
+ rule(:vf) {
101
+ vf_header >> (
102
+ indent >> indicators.as(:indicators) >>
103
+ indent >> subfield_codes.as(:subfield_codes) >>
104
+ (indent >> ivc_def).as(:inst_or_voices_codes).maybe
105
+ ).maybe
106
+ }
107
+
108
+ # ------------------------------------------------------------
109
+ # sections
110
+
111
+ rule(:section_header) { str('--') >> text.as(:desc) >> str('--') }
112
+
113
+ rule(:section) { section_header.maybe >> (nc >> vf).repeat(1).as(:var_fields) >> nc }
114
+
115
+ # ------------------------------------------------------------
116
+ # entire list
117
+
118
+ rule(:list) { (comment >> eol).repeat(0).as(:doc_comments) >> nc >> section.repeat(1).as(:sections) }
119
+
120
+ root(:list)
121
+
122
+ # ------------------------------------------------------------
123
+ # Parser
124
+
125
+ def parse(io, options = nil)
126
+ opts = { reporter: Parslet::ErrorReporter::Deepest.new }
127
+ opts.merge!(options) if options
128
+
129
+ super(io, opts)
130
+ end
131
+ end
132
+ # rubocop:enable Style/BlockDelimiters
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,160 @@
1
+ require 'parslet'
2
+ require 'berkeley_library/marc/field_info/var_fields/var_field_list'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ # TODO: flag [OBSOLETE], (R), (NR)
9
+ # rubocop:disable Style/BlockDelimiters
10
+ class VarFieldTransform < Parslet::Transform
11
+
12
+ class AnyValue
13
+ attr_reader :val, :desc
14
+
15
+ def initialize(val:, desc:)
16
+ @val = val
17
+ @desc = desc
18
+ end
19
+
20
+ def to_ind_val_def
21
+ IndValDef.new(val: val, desc: desc)
22
+ end
23
+
24
+ def to_subfield_val
25
+ SubfieldVal.new(val: val, desc: desc)
26
+ end
27
+
28
+ def to_ivc
29
+ InstrumentOrVoicesCode.new(val: val, desc: desc)
30
+ end
31
+ end
32
+
33
+ # ------------------------------------------------------------
34
+ # structural elements
35
+
36
+ # comment
37
+ rule(comment: simple(:comment)) {
38
+ comment
39
+ }
40
+
41
+ # value
42
+ rule(val: simple(:val), desc: simple(:desc)) {
43
+ AnyValue.new(val: val, desc: desc)
44
+ }
45
+
46
+ # ------------------------------------------------------------
47
+ # indicators
48
+
49
+ # ind1
50
+ rule(desc: simple(:desc), ind1: sequence(:val_defs)) {
51
+ IndDef.new(pos: 1, desc: desc, val_defs: val_defs.map(&:to_ind_val_def))
52
+ }
53
+
54
+ # ind2
55
+ rule(desc: simple(:desc), ind2: sequence(:val_defs)) {
56
+ IndDef.new(pos: 2, desc: desc, val_defs: val_defs.map(&:to_ind_val_def))
57
+ }
58
+
59
+ # ------------------------------------------------------------
60
+ # subfield definitions
61
+
62
+ # sf_code_def (single code)
63
+ rule(code: simple(:code), desc: simple(:desc), values: sequence(:values)) {
64
+ SubfieldDef.new(code: code, desc: desc, values: values.map(&:to_subfield_val))
65
+ }
66
+
67
+ # sf_code_def (range code, e.g. 886a-z, 0-9)
68
+ rule(code1: simple(:code1), code2: simple(:code2), desc: simple(:desc), values: sequence(:values)) {
69
+ SubfieldDef.new(code: (code1.to_s..code2.to_s), desc: desc, values: values.map(&:to_subfield_val))
70
+ }
71
+
72
+ # ------------------------------------------------------------
73
+ # variable fields
74
+
75
+ rule(
76
+ tag: simple(:tag),
77
+ desc: simple(:desc),
78
+ indicators: sequence(:indicators),
79
+ subfield_codes: sequence(:subfield_codes),
80
+ inst_or_voices_codes: sequence(:inst_or_voices_codes)
81
+ ) {
82
+ VarFieldDef.new(
83
+ tag: tag,
84
+ desc: desc,
85
+ indicators: indicators,
86
+ subfield_codes: subfield_codes,
87
+ # instrument or voices codes (only for 048)
88
+ inst_or_voices_codes: inst_or_voices_codes.map(&:to_ivc)
89
+ )
90
+ }
91
+
92
+ rule(
93
+ tag: simple(:tag),
94
+ desc: simple(:desc),
95
+ indicators: sequence(:indicators),
96
+ subfield_codes: sequence(:subfield_codes)
97
+ ) {
98
+ VarFieldDef.new(
99
+ tag: tag,
100
+ desc: desc,
101
+ indicators: indicators,
102
+ subfield_codes: subfield_codes
103
+ )
104
+ }
105
+
106
+ # TODO: reinstate these?
107
+ # rule(
108
+ # tag: simple(:tag),
109
+ # desc: simple(:desc),
110
+ # indicators: sequence(:indicators)
111
+ # ) {
112
+ # VarFieldDef.new(
113
+ # tag: tag,
114
+ # desc: desc,
115
+ # indicators: indicators
116
+ # )
117
+ # }
118
+
119
+ # rule(
120
+ # tag: simple(:tag),
121
+ # desc: simple(:desc),
122
+ # subfield_codes: sequence(:subfield_codes)
123
+ # ) {
124
+ # VarFieldDef.new(
125
+ # tag: tag,
126
+ # desc: desc,
127
+ # subfield_codes: subfield_codes
128
+ # )
129
+ # }
130
+
131
+ rule(
132
+ tag: simple(:tag),
133
+ desc: simple(:desc)
134
+ ) {
135
+ VarFieldDef.new(
136
+ tag: tag,
137
+ desc: desc
138
+ )
139
+ }
140
+
141
+ # ------------------------------------------------------------
142
+ # Sections
143
+
144
+ rule(desc: simple(:desc), var_fields: sequence(:var_fields)) {
145
+ Section.new(desc: desc, var_fields: var_fields)
146
+ }
147
+
148
+ # ------------------------------------------------------------
149
+ # List
150
+
151
+ rule(doc_comments: sequence(:doc_comments), sections: sequence(:sections)) {
152
+ desc = doc_comments.reject { |c| c.start_with?('TODO:') }.join("\n") # TODO: is "\n" best?
153
+ VarFieldList.new(desc: desc, sections: sections)
154
+ }
155
+ end
156
+ # rubocop:enable Style/BlockDelimiters
157
+ end
158
+ end
159
+ end
160
+ end