berkeley_library-marc 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/marc.iml +101 -0
  6. data/.idea/misc.xml +4 -0
  7. data/.idea/modules.xml +8 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +12 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +4 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-marc.gemspec +42 -0
  21. data/docker-compose.yml +15 -0
  22. data/lib/.rubocop.yml +6 -0
  23. data/lib/berkeley_library/marc.rb +3 -0
  24. data/lib/berkeley_library/marc/field_info.rb +1 -0
  25. data/lib/berkeley_library/marc/field_info/ctrl_fields/data/ctrl_fields_standard.txt +2143 -0
  26. data/lib/berkeley_library/marc/field_info/leader/data/leader_standard.txt +87 -0
  27. data/lib/berkeley_library/marc/field_info/var_fields.rb +46 -0
  28. data/lib/berkeley_library/marc/field_info/var_fields/data.rb +4 -0
  29. data/lib/berkeley_library/marc/field_info/var_fields/data/mapping-orig.tsv +265 -0
  30. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx.txt +53 -0
  31. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx_parsed.rb +51 -0
  32. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard.txt +5458 -0
  33. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard_parsed.rb +6577 -0
  34. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved.txt +44 -0
  35. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved_parsed.rb +30 -0
  36. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind.txt +105 -0
  37. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind_parsed.rb +114 -0
  38. data/lib/berkeley_library/marc/field_info/var_fields/ind_def.rb +39 -0
  39. data/lib/berkeley_library/marc/field_info/var_fields/ind_val_def.rb +27 -0
  40. data/lib/berkeley_library/marc/field_info/var_fields/instrument_or_voices_code.rb +26 -0
  41. data/lib/berkeley_library/marc/field_info/var_fields/obsolescible.rb +55 -0
  42. data/lib/berkeley_library/marc/field_info/var_fields/section.rb +50 -0
  43. data/lib/berkeley_library/marc/field_info/var_fields/subfield_def.rb +50 -0
  44. data/lib/berkeley_library/marc/field_info/var_fields/subfield_val.rb +24 -0
  45. data/lib/berkeley_library/marc/field_info/var_fields/var_field_def.rb +62 -0
  46. data/lib/berkeley_library/marc/field_info/var_fields/var_field_list.rb +43 -0
  47. data/lib/berkeley_library/marc/field_info/var_fields/var_field_parser.rb +136 -0
  48. data/lib/berkeley_library/marc/field_info/var_fields/var_field_transform.rb +160 -0
  49. data/lib/berkeley_library/marc/module_info.rb +14 -0
  50. data/lib/marc_extensions.rb +1 -0
  51. data/lib/marc_extensions/data_field.rb +29 -0
  52. data/lib/marc_extensions/field_map.rb +63 -0
  53. data/lib/marc_extensions/record.rb +100 -0
  54. data/lib/marc_extensions/subfield.rb +21 -0
  55. data/lib/marc_extensions/xml_reader.rb +19 -0
  56. data/rakelib/bundle.rake +8 -0
  57. data/rakelib/coverage.rake +11 -0
  58. data/rakelib/gem.rake +54 -0
  59. data/rakelib/rubocop.rake +18 -0
  60. data/rakelib/spec.rake +2 -0
  61. data/spec/.rubocop.yml +37 -0
  62. data/spec/berkeley_library/marc/field_info/var_fields/var_field_def_spec.rb +26 -0
  63. data/spec/berkeley_library/marc/field_info/var_fields/var_field_parser_spec.rb +596 -0
  64. data/spec/berkeley_library/marc/field_info/var_fields/var_field_transform_spec.rb +173 -0
  65. data/spec/berkeley_library/marc/field_info/var_fields_spec.rb +112 -0
  66. data/spec/data/field_info/vf_046.txt +32 -0
  67. data/spec/data/field_info/vf_048.txt +112 -0
  68. data/spec/data/record-187888.xml +78 -0
  69. data/spec/marc_extensions/data_field_spec.rb +13 -0
  70. data/spec/marc_extensions/record_spec.rb +211 -0
  71. data/spec/spec_helper.rb +27 -0
  72. metadata +354 -0
@@ -0,0 +1,50 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/var_field_def'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ class Section
9
+ include Enumerable
10
+ include Obsolescible
11
+
12
+ attr_reader :desc
13
+ attr_reader :var_fields
14
+
15
+ def initialize(desc:, var_fields:)
16
+ @desc = desc
17
+ @var_fields = var_fields
18
+ end
19
+
20
+ def each(&block)
21
+ return to_enum(:each) unless block_given?
22
+
23
+ var_fields.each(&block)
24
+ end
25
+
26
+ def size
27
+ var_fields.size
28
+ end
29
+
30
+ def empty?
31
+ var_fields.empty?
32
+ end
33
+
34
+ def reject_obsoletes
35
+ Section.new(
36
+ desc: desc,
37
+ var_fields: _reject_obsolete(var_fields)
38
+ )
39
+ end
40
+
41
+ def to_s
42
+ blocks = ["--#{desc}--"]
43
+ blocks.concat(var_fields.map(&:to_s))
44
+ blocks.join("\n")
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,50 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/subfield_val'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ # TODO: destructure range codes in some useful way (e.g. 886a-z, 0-9)
9
+ class SubfieldDef
10
+ include Obsolescible
11
+
12
+ attr_reader :code, :desc, :values
13
+
14
+ def initialize(code:, desc:, values: [])
15
+ @code = code
16
+ @desc = desc
17
+ @values = values
18
+ end
19
+
20
+ def reject_obsoletes
21
+ SubfieldDef.new(code: code, desc: desc, values: _reject_obsolete(values))
22
+ end
23
+
24
+ def each_value(&block)
25
+ return to_enum(:each_value) unless block_given?
26
+
27
+ values.each(&block)
28
+ end
29
+
30
+ INDENT = ' '.freeze
31
+ private_constant :INDENT
32
+
33
+ def to_s
34
+ lines = ["$#{code_str} - #{desc}"]
35
+ values.each { |v| lines << INDENT + v.to_s }
36
+ lines.join("\n")
37
+ end
38
+
39
+ private
40
+
41
+ def code_str
42
+ return code.to_s unless code.is_a?(Range)
43
+
44
+ "#{code.first}-#{code.last}"
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,24 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ module FieldInfo
4
+ module VarFields
5
+ class SubfieldVal
6
+ # TODO: include Comparable
7
+ include Obsolescible
8
+
9
+ attr_reader :val
10
+ attr_reader :desc
11
+
12
+ def initialize(val:, desc:)
13
+ @val = val
14
+ @desc = desc
15
+ end
16
+
17
+ def to_s
18
+ "#{val} - #{desc}"
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,62 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/ind_def'
2
+ require 'berkeley_library/marc/field_info/var_fields/subfield_def'
3
+ require 'berkeley_library/marc/field_info/var_fields/instrument_or_voices_code'
4
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
5
+
6
+ module BerkeleyLibrary
7
+ module Marc
8
+ module FieldInfo
9
+ module VarFields
10
+ class VarFieldDef
11
+ include Obsolescible
12
+
13
+ attr_reader :tag, :desc, :indicators, :subfield_codes, :inst_or_voices_codes
14
+
15
+ def initialize(tag:, desc:, indicators: [], subfield_codes: [], inst_or_voices_codes: [])
16
+ @tag = tag
17
+ @desc = desc
18
+ @indicators = indicators # TODO: split out ind1/ind2 (but what about obsolete?)
19
+ @subfield_codes = subfield_codes
20
+ @inst_or_voices_codes = inst_or_voices_codes
21
+ end
22
+
23
+ def to_s
24
+ blocks = ["#{tag} - #{desc}"]
25
+
26
+ concat_indented(blocks, indicators, 'Indicators')
27
+ concat_indented(blocks, subfield_codes, 'Subfield Codes')
28
+ concat_indented(blocks, inst_or_voices_codes, 'Instrument or Voices Codes')
29
+
30
+ blocks.join("\n")
31
+ end
32
+
33
+ def reject_obsoletes
34
+ VarFieldDef.new(
35
+ tag: tag,
36
+ desc: desc,
37
+ indicators: _reject_obsolete(indicators),
38
+ subfield_codes: _reject_obsolete(subfield_codes),
39
+ inst_or_voices_codes: _reject_obsolete(inst_or_voices_codes)
40
+ )
41
+ end
42
+
43
+ private
44
+
45
+ INDENT = ' '.freeze
46
+ private_constant :INDENT
47
+
48
+ def concat_indented(blocks, values, header)
49
+ return if values.empty?
50
+
51
+ blocks << INDENT + header
52
+ values.each do |v|
53
+ lines = v.to_s.lines(chomp: true)
54
+ lines.each { |line| blocks << INDENT + INDENT + line }
55
+ end
56
+ end
57
+
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,43 @@
1
+ require 'berkeley_library/marc/field_info/var_fields/section'
2
+ require 'berkeley_library/marc/field_info/var_fields/obsolescible'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ class VarFieldList
9
+ include Enumerable
10
+ include Obsolescible
11
+
12
+ attr_reader :desc, :sections
13
+
14
+ def initialize(desc:, sections:)
15
+ @desc = desc
16
+ @sections = sections
17
+ end
18
+
19
+ def each(&block)
20
+ return to_enum(:each) unless block_given?
21
+
22
+ sections.each { |section| section.each(&block) }
23
+ end
24
+
25
+ def size
26
+ sections.sum(&:size)
27
+ end
28
+
29
+ def reject_obsoletes
30
+ VarFieldList.new(
31
+ desc: desc,
32
+ sections: _reject_obsolete(sections)
33
+ )
34
+ end
35
+
36
+ def to_s
37
+ sections.map(&:to_s).join("\n")
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,136 @@
1
+ require 'parslet'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+
8
+ # rubocop:disable Style/BlockDelimiters
9
+ class VarFieldParser < Parslet::Parser
10
+
11
+ # ------------------------------------------------------------
12
+ # structural
13
+
14
+ rule(:blank) { match('[[:blank:]]') }
15
+
16
+ rule(:nonhyphen) { match('[\t[:print:]&&[^-]]') }
17
+
18
+ rule(:nonterminal_hyphen) { match('-(?!-?$)') }
19
+
20
+ rule(:printable) { nonhyphen | nonterminal_hyphen }
21
+
22
+ rule(:ln_br) { str("\r\n") | match('[\n\v\f\r\u0085\u2028\u2029]') }
23
+
24
+ rule(:eof) { any.absent? }
25
+
26
+ rule(:eol) { blank.repeat(0) >> ln_br }
27
+
28
+ rule(:text) { printable.repeat(0) }
29
+
30
+ rule(:comment) { blank.repeat(0) >> str('//') >> blank.repeat(0) >> text.as(:comment) }
31
+
32
+ # non-captured
33
+ rule(:nc) { (eol | comment).repeat(0) }
34
+
35
+ rule(:indent) { (eol | comment).repeat(1) >> blank.repeat(1) }
36
+
37
+ # ------------------------------------------------------------
38
+ # indicators
39
+
40
+ rule(:ind_header) { str('Indicators') }
41
+
42
+ rule(:ind1_header) { str('First - ') >> text.as(:desc) }
43
+
44
+ rule(:ind2_header) { str('Second - ') >> text.as(:desc) }
45
+
46
+ rule(:ind_val) {
47
+ # Yes, caps should be invalid; yes, TIND does it anyway
48
+ match('[0-9a-zA-Z#]')
49
+ }
50
+
51
+ # Relatively rare -- e.g. 130
52
+ rule(:ind_val_range) { match('[0-9]') >> str('-') >> match('[0-9]') }
53
+
54
+ rule(:ind_def) { (ind_val_range | ind_val).as(:val) >> str(' - ') >> text.as(:desc) }
55
+
56
+ rule(:ind1) { ind1_header >> (indent >> ind_def).repeat(0).as(:ind1) }
57
+
58
+ rule(:ind2) { ind2_header >> (indent >> ind_def).repeat(0).as(:ind2) }
59
+
60
+ rule(:indicators) do
61
+ # TODO: enforce that we only get multiples when all but one are obsolete
62
+ ind_header >> (indent >> (ind1 | ind2)).repeat(0)
63
+ end
64
+
65
+ # ------------------------------------------------------------
66
+ # subfield definitions
67
+
68
+ rule(:sf_header) { str('Subfield Codes') }
69
+
70
+ rule(:sf_code) { match('[0-9a-z]') }
71
+
72
+ # rare (e.g. 034a)
73
+ rule(:subfield_value) { match('[^$ \t]').repeat(1).as(:val) >> str(' - ') >> text.as(:desc) }
74
+
75
+ rule(:subfield_def) {
76
+ str('$') >>
77
+ (
78
+ (sf_code.as(:code1) >> str('-') >> sf_code.as(:code2)) | # range - rarely used (e.g. 886a-z, 0-9)
79
+ sf_code.as(:code) # single code - typical case
80
+ ) >>
81
+ (str(' - ') | str(' ')) >> text.as(:desc) >> # 880 doesn't have -, probably a typo
82
+ (indent >> subfield_value).repeat(0).as(:values)
83
+ }
84
+
85
+ rule(:subfield_codes) { sf_header.maybe >> (indent >> subfield_def).repeat(1) }
86
+
87
+ # instrument or voices codes (only for 048)
88
+
89
+ rule(:ivc_header) { str('Instrument or Voices Codes') }
90
+
91
+ rule(:ivc_value) { match('[a-z]').repeat(2, 2).as(:val) >> str(' - ') >> text.as(:desc) }
92
+
93
+ rule(:ivc_def) { ivc_header >> (indent >> ivc_value).repeat(1) }
94
+
95
+ # ------------------------------------------------------------
96
+ # variable fields
97
+
98
+ rule(:vf_header) { match('[0-9]').repeat(3, 3).as(:tag) >> str(' - ') >> text.as(:desc) }
99
+
100
+ rule(:vf) {
101
+ vf_header >> (
102
+ indent >> indicators.as(:indicators) >>
103
+ indent >> subfield_codes.as(:subfield_codes) >>
104
+ (indent >> ivc_def).as(:inst_or_voices_codes).maybe
105
+ ).maybe
106
+ }
107
+
108
+ # ------------------------------------------------------------
109
+ # sections
110
+
111
+ rule(:section_header) { str('--') >> text.as(:desc) >> str('--') }
112
+
113
+ rule(:section) { section_header.maybe >> (nc >> vf).repeat(1).as(:var_fields) >> nc }
114
+
115
+ # ------------------------------------------------------------
116
+ # entire list
117
+
118
+ rule(:list) { (comment >> eol).repeat(0).as(:doc_comments) >> nc >> section.repeat(1).as(:sections) }
119
+
120
+ root(:list)
121
+
122
+ # ------------------------------------------------------------
123
+ # Parser
124
+
125
+ def parse(io, options = nil)
126
+ opts = { reporter: Parslet::ErrorReporter::Deepest.new }
127
+ opts.merge!(options) if options
128
+
129
+ super(io, opts)
130
+ end
131
+ end
132
+ # rubocop:enable Style/BlockDelimiters
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,160 @@
1
+ require 'parslet'
2
+ require 'berkeley_library/marc/field_info/var_fields/var_field_list'
3
+
4
+ module BerkeleyLibrary
5
+ module Marc
6
+ module FieldInfo
7
+ module VarFields
8
+ # TODO: flag [OBSOLETE], (R), (NR)
9
+ # rubocop:disable Style/BlockDelimiters
10
+ class VarFieldTransform < Parslet::Transform
11
+
12
+ class AnyValue
13
+ attr_reader :val, :desc
14
+
15
+ def initialize(val:, desc:)
16
+ @val = val
17
+ @desc = desc
18
+ end
19
+
20
+ def to_ind_val_def
21
+ IndValDef.new(val: val, desc: desc)
22
+ end
23
+
24
+ def to_subfield_val
25
+ SubfieldVal.new(val: val, desc: desc)
26
+ end
27
+
28
+ def to_ivc
29
+ InstrumentOrVoicesCode.new(val: val, desc: desc)
30
+ end
31
+ end
32
+
33
+ # ------------------------------------------------------------
34
+ # structural elements
35
+
36
+ # comment
37
+ rule(comment: simple(:comment)) {
38
+ comment
39
+ }
40
+
41
+ # value
42
+ rule(val: simple(:val), desc: simple(:desc)) {
43
+ AnyValue.new(val: val, desc: desc)
44
+ }
45
+
46
+ # ------------------------------------------------------------
47
+ # indicators
48
+
49
+ # ind1
50
+ rule(desc: simple(:desc), ind1: sequence(:val_defs)) {
51
+ IndDef.new(pos: 1, desc: desc, val_defs: val_defs.map(&:to_ind_val_def))
52
+ }
53
+
54
+ # ind2
55
+ rule(desc: simple(:desc), ind2: sequence(:val_defs)) {
56
+ IndDef.new(pos: 2, desc: desc, val_defs: val_defs.map(&:to_ind_val_def))
57
+ }
58
+
59
+ # ------------------------------------------------------------
60
+ # subfield definitions
61
+
62
+ # sf_code_def (single code)
63
+ rule(code: simple(:code), desc: simple(:desc), values: sequence(:values)) {
64
+ SubfieldDef.new(code: code, desc: desc, values: values.map(&:to_subfield_val))
65
+ }
66
+
67
+ # sf_code_def (range code, e.g. 886a-z, 0-9)
68
+ rule(code1: simple(:code1), code2: simple(:code2), desc: simple(:desc), values: sequence(:values)) {
69
+ SubfieldDef.new(code: (code1.to_s..code2.to_s), desc: desc, values: values.map(&:to_subfield_val))
70
+ }
71
+
72
+ # ------------------------------------------------------------
73
+ # variable fields
74
+
75
+ rule(
76
+ tag: simple(:tag),
77
+ desc: simple(:desc),
78
+ indicators: sequence(:indicators),
79
+ subfield_codes: sequence(:subfield_codes),
80
+ inst_or_voices_codes: sequence(:inst_or_voices_codes)
81
+ ) {
82
+ VarFieldDef.new(
83
+ tag: tag,
84
+ desc: desc,
85
+ indicators: indicators,
86
+ subfield_codes: subfield_codes,
87
+ # instrument or voices codes (only for 048)
88
+ inst_or_voices_codes: inst_or_voices_codes.map(&:to_ivc)
89
+ )
90
+ }
91
+
92
+ rule(
93
+ tag: simple(:tag),
94
+ desc: simple(:desc),
95
+ indicators: sequence(:indicators),
96
+ subfield_codes: sequence(:subfield_codes)
97
+ ) {
98
+ VarFieldDef.new(
99
+ tag: tag,
100
+ desc: desc,
101
+ indicators: indicators,
102
+ subfield_codes: subfield_codes
103
+ )
104
+ }
105
+
106
+ # TODO: reinstate these?
107
+ # rule(
108
+ # tag: simple(:tag),
109
+ # desc: simple(:desc),
110
+ # indicators: sequence(:indicators)
111
+ # ) {
112
+ # VarFieldDef.new(
113
+ # tag: tag,
114
+ # desc: desc,
115
+ # indicators: indicators
116
+ # )
117
+ # }
118
+
119
+ # rule(
120
+ # tag: simple(:tag),
121
+ # desc: simple(:desc),
122
+ # subfield_codes: sequence(:subfield_codes)
123
+ # ) {
124
+ # VarFieldDef.new(
125
+ # tag: tag,
126
+ # desc: desc,
127
+ # subfield_codes: subfield_codes
128
+ # )
129
+ # }
130
+
131
+ rule(
132
+ tag: simple(:tag),
133
+ desc: simple(:desc)
134
+ ) {
135
+ VarFieldDef.new(
136
+ tag: tag,
137
+ desc: desc
138
+ )
139
+ }
140
+
141
+ # ------------------------------------------------------------
142
+ # Sections
143
+
144
+ rule(desc: simple(:desc), var_fields: sequence(:var_fields)) {
145
+ Section.new(desc: desc, var_fields: var_fields)
146
+ }
147
+
148
+ # ------------------------------------------------------------
149
+ # List
150
+
151
+ rule(doc_comments: sequence(:doc_comments), sections: sequence(:sections)) {
152
+ desc = doc_comments.reject { |c| c.start_with?('TODO:') }.join("\n") # TODO: is "\n" best?
153
+ VarFieldList.new(desc: desc, sections: sections)
154
+ }
155
+ end
156
+ # rubocop:enable Style/BlockDelimiters
157
+ end
158
+ end
159
+ end
160
+ end