berkeley_library-marc 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/marc.iml +101 -0
  6. data/.idea/misc.xml +4 -0
  7. data/.idea/modules.xml +8 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +12 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +4 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-marc.gemspec +42 -0
  21. data/docker-compose.yml +15 -0
  22. data/lib/.rubocop.yml +6 -0
  23. data/lib/berkeley_library/marc.rb +3 -0
  24. data/lib/berkeley_library/marc/field_info.rb +1 -0
  25. data/lib/berkeley_library/marc/field_info/ctrl_fields/data/ctrl_fields_standard.txt +2143 -0
  26. data/lib/berkeley_library/marc/field_info/leader/data/leader_standard.txt +87 -0
  27. data/lib/berkeley_library/marc/field_info/var_fields.rb +46 -0
  28. data/lib/berkeley_library/marc/field_info/var_fields/data.rb +4 -0
  29. data/lib/berkeley_library/marc/field_info/var_fields/data/mapping-orig.tsv +265 -0
  30. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx.txt +53 -0
  31. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx_parsed.rb +51 -0
  32. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard.txt +5458 -0
  33. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard_parsed.rb +6577 -0
  34. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved.txt +44 -0
  35. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved_parsed.rb +30 -0
  36. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind.txt +105 -0
  37. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind_parsed.rb +114 -0
  38. data/lib/berkeley_library/marc/field_info/var_fields/ind_def.rb +39 -0
  39. data/lib/berkeley_library/marc/field_info/var_fields/ind_val_def.rb +27 -0
  40. data/lib/berkeley_library/marc/field_info/var_fields/instrument_or_voices_code.rb +26 -0
  41. data/lib/berkeley_library/marc/field_info/var_fields/obsolescible.rb +55 -0
  42. data/lib/berkeley_library/marc/field_info/var_fields/section.rb +50 -0
  43. data/lib/berkeley_library/marc/field_info/var_fields/subfield_def.rb +50 -0
  44. data/lib/berkeley_library/marc/field_info/var_fields/subfield_val.rb +24 -0
  45. data/lib/berkeley_library/marc/field_info/var_fields/var_field_def.rb +62 -0
  46. data/lib/berkeley_library/marc/field_info/var_fields/var_field_list.rb +43 -0
  47. data/lib/berkeley_library/marc/field_info/var_fields/var_field_parser.rb +136 -0
  48. data/lib/berkeley_library/marc/field_info/var_fields/var_field_transform.rb +160 -0
  49. data/lib/berkeley_library/marc/module_info.rb +14 -0
  50. data/lib/marc_extensions.rb +1 -0
  51. data/lib/marc_extensions/data_field.rb +29 -0
  52. data/lib/marc_extensions/field_map.rb +63 -0
  53. data/lib/marc_extensions/record.rb +100 -0
  54. data/lib/marc_extensions/subfield.rb +21 -0
  55. data/lib/marc_extensions/xml_reader.rb +19 -0
  56. data/rakelib/bundle.rake +8 -0
  57. data/rakelib/coverage.rake +11 -0
  58. data/rakelib/gem.rake +54 -0
  59. data/rakelib/rubocop.rake +18 -0
  60. data/rakelib/spec.rake +2 -0
  61. data/spec/.rubocop.yml +37 -0
  62. data/spec/berkeley_library/marc/field_info/var_fields/var_field_def_spec.rb +26 -0
  63. data/spec/berkeley_library/marc/field_info/var_fields/var_field_parser_spec.rb +596 -0
  64. data/spec/berkeley_library/marc/field_info/var_fields/var_field_transform_spec.rb +173 -0
  65. data/spec/berkeley_library/marc/field_info/var_fields_spec.rb +112 -0
  66. data/spec/data/field_info/vf_046.txt +32 -0
  67. data/spec/data/field_info/vf_048.txt +112 -0
  68. data/spec/data/record-187888.xml +78 -0
  69. data/spec/marc_extensions/data_field_spec.rb +13 -0
  70. data/spec/marc_extensions/record_spec.rb +211 -0
  71. data/spec/spec_helper.rb +27 -0
  72. metadata +354 -0
@@ -0,0 +1,14 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ class ModuleInfo
4
+ NAME = 'berkeley_library-marc'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'MARC utilities for the UC Berkeley Library'.freeze
8
+ DESCRIPTION = 'A gem providing MARC-related utility code and extensions to ruby-marc for the UC Berkeley Library'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '0.2.0'.freeze
11
+ HOMEPAGE = 'https://github.com/BerkeleyLibrary/marc'.freeze
12
+ end
13
+ end
14
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('marc_extensions/*.rb', __dir__)).sort.each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'marc'
2
+ require 'marc_extensions/subfield'
3
+
4
+ module MARCExtensions
5
+ module DataFieldExtensions
6
+ def subfield_codes
7
+ subfields.map(&:code)
8
+ end
9
+
10
+ def frozen?
11
+ [tag, indicator1, indicator2, subfields].all?(&:frozen?)
12
+ subfields.all?(&:frozen?)
13
+ end
14
+
15
+ def freeze
16
+ [tag, indicator1, indicator2].each(&:freeze)
17
+ subfields.each(&:freeze)
18
+ subfields.freeze
19
+ self
20
+ end
21
+ end
22
+ end
23
+
24
+ module MARC
25
+ # @see https://rubydoc.info/gems/marc/MARC/DataField RubyGems documentation
26
+ class DataField
27
+ prepend MARCExtensions::DataFieldExtensions
28
+ end
29
+ end
@@ -0,0 +1,63 @@
1
+ require 'marc'
2
+ module MARCExtensions
3
+ module FieldMapExtensions
4
+
5
+ VALID_TAGS = ('000'..'999').freeze
6
+
7
+ # Gets the specified fields in order by tag.
8
+ #
9
+ # @overload each_sorted_by_tag(tags, &block)
10
+ # Yields each specified field.
11
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
12
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
13
+ # @overload each_sorted_by_tag(tags)
14
+ # An enumerator of the specified variable fields, sorted by tag.
15
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
16
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
17
+ # @overload each_sorted_by_tag(&block)
18
+ # Yields all fields, sorted by tag.
19
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
20
+ # @overload each_sorted_by_tag
21
+ # An enumerator of all fields, sorted by tag.
22
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
23
+ def each_sorted_by_tag(tags = nil, &block)
24
+ reindex unless @clean
25
+
26
+ indices_for(tags).map { |i| self[i] }.each(&block)
27
+ end
28
+
29
+ private
30
+
31
+ def indices_for(tags)
32
+ return all_indices unless tags
33
+
34
+ sorted_tag_array(tags)
35
+ .lazy # prevent unnecessary allocations
36
+ .map { |t| @tags[t] } # get indices for each tag
37
+ .reject(&:nil?) # ignoring any tags we don't have fields for
38
+ .flat_map { |x| x } # flatten list of indices -- equiv. Array#flatten
39
+ end
40
+
41
+ def all_indices
42
+ [].tap do |a|
43
+ @tags.keys.sort.map do |t|
44
+ a.concat(@tags[t])
45
+ end
46
+ end
47
+ end
48
+
49
+ def sorted_tag_array(tags)
50
+ return Array(tags) if tags.is_a?(Range)
51
+
52
+ Array(tags).sort
53
+ end
54
+
55
+ end
56
+ end
57
+
58
+ module MARC
59
+ # @see https://rubydoc.info/gems/marc/MARC/FieldMap RubyGems documentation
60
+ class FieldMap
61
+ prepend MARCExtensions::FieldMapExtensions
62
+ end
63
+ end
@@ -0,0 +1,100 @@
1
+ require 'marc'
2
+ require 'marc_extensions/field_map'
3
+ require 'marc_extensions/data_field'
4
+
5
+ module MARCExtensions
6
+ module RecordExtensions
7
+
8
+ # Gets the specified fields in order by tag.
9
+ #
10
+ # @see FieldMapExtensions#each_sorted_by_tag
11
+ # @overload each_sorted_by_tag(tags, &block)
12
+ # Yields each specified field.
13
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
14
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
15
+ # @overload each_sorted_by_tag(tags)
16
+ # An enumerator of the specified variable fields, sorted by tag.
17
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
18
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
19
+ # @overload each_sorted_by_tag(&block)
20
+ # Yields all fields, sorted by tag.
21
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
22
+ # @overload each_sorted_by_tag
23
+ # An enumerator of all fields, sorted by tag.
24
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
25
+ def each_sorted_by_tag(tags = nil, &block)
26
+ @fields.each_sorted_by_tag(tags, &block)
27
+ end
28
+
29
+ # Gets only the control fields (tag 000-009) from the record. (Note that
30
+ # this method does not protect against pathological records with data
31
+ # fields in the control field range.)
32
+ #
33
+ # @overload each_control_field
34
+ # An enumerator of the control fields.
35
+ # @return [Enumerator::Lazy<MARC::ControlField>] the fields
36
+ # @overload each_control_field(&block)
37
+ # Yields each control field.
38
+ # @yieldparam field [MARC::ControlField] Each control field.
39
+ def each_control_field(&block)
40
+ each_sorted_by_tag.take_while { |df| df.tag.to_i <= 10 }.each(&block)
41
+ end
42
+
43
+ # Gets only the data fields (tag 010-999) from the record. (Note that
44
+ # this method does not protect against pathological records with control
45
+ # fields in the data field range.)
46
+ #
47
+ # @overload each_data_field
48
+ # An enumerator of the data fields.
49
+ # @return [Enumerator::Lazy<MARC::DataField>] the fields
50
+ # @overload each_data_field(&block)
51
+ # Yields each data field.
52
+ # @yieldparam field [MARC::DataField] Each data field.
53
+ def each_data_field(&block)
54
+ each_sorted_by_tag.select { |df| df.tag.to_i > 10 }.each(&block)
55
+ end
56
+
57
+ # Gets the data fields from the record and groups them by tag.
58
+ #
59
+ # @return [Hash<String, Array<MARC::DataField>>] a hash from tags to fields
60
+ def data_fields_by_tag
61
+ # noinspection RubyYardReturnMatch
62
+ each_data_field.with_object({}) { |df, t2df| (t2df[df.tag] ||= []) << df }
63
+ end
64
+
65
+ # Gets only the data fields (tag 010-999) from the record. (Note that
66
+ # this method does not protect against pathological records with control
67
+ # fields in the data field range.)
68
+ #
69
+ # @return [Array<DataField>] the data fields.
70
+ def data_fields
71
+ data_fields_by_tag.values.flatten
72
+ end
73
+
74
+ # Freezes the leader and fields.
75
+ def freeze
76
+ leader.freeze
77
+ fields.each(&:freeze)
78
+ fields.freeze
79
+ self
80
+ end
81
+
82
+ # @return [Boolean] true if the fields and leader are frozen
83
+ def frozen?
84
+ (fields.frozen? && leader.frozen?)
85
+ end
86
+
87
+ # TODO: use info from parsed documentation? or move to TIND-specific extension
88
+ def record_id
89
+ cf_001 = self['001']
90
+ return cf_001.value if cf_001
91
+ end
92
+ end
93
+ end
94
+
95
+ module MARC
96
+ # @see https://rubydoc.info/gems/marc/MARC/Record RubyGems documentation
97
+ class Record
98
+ prepend MARCExtensions::RecordExtensions
99
+ end
100
+ end
@@ -0,0 +1,21 @@
1
+ require 'marc'
2
+
3
+ module MARCExtensions
4
+ module SubfieldExtensions
5
+ def frozen?
6
+ [code, value].all?(&:frozen?)
7
+ end
8
+
9
+ def freeze
10
+ [code, value].each(&:freeze)
11
+ self
12
+ end
13
+ end
14
+ end
15
+
16
+ module MARC
17
+ # @see https://rubydoc.info/gems/marc/MARC/Subfield RubyGems documentation
18
+ class Subfield
19
+ prepend MARCExtensions::SubfieldExtensions
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ require 'marc'
2
+ require 'marc_extensions/record'
3
+
4
+ module MARCExtensions
5
+ module XMLReaderClassExtensions
6
+ def read(file, freeze: false)
7
+ new(file, freeze: freeze)
8
+ end
9
+ end
10
+ end
11
+
12
+ module MARC
13
+ # @see https://rubydoc.info/gems/marc/MARC/XMLReader RubyGems documentation
14
+ class XMLReader
15
+ class << self
16
+ prepend MARCExtensions::XMLReaderClassExtensions
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ namespace :bundle do
2
+ desc 'Updates the ruby-advisory-db then runs bundle-audit'
3
+ task :audit do
4
+ require 'bundler/audit/cli'
5
+ Bundler::Audit::CLI.start ['update']
6
+ Bundler::Audit::CLI.start %w[check --ignore CVE-2015-9284]
7
+ end
8
+ end
@@ -0,0 +1,11 @@
1
+ require 'ci/reporter/rake/rspec'
2
+
3
+ # Configure CI::Reporter report generation
4
+ ENV['GENERATE_REPORTS'] ||= 'true'
5
+ ENV['CI_REPORTS'] = 'artifacts/rspec'
6
+
7
+ desc 'Run all specs in spec directory, with coverage'
8
+ task coverage: ['ci:setup:rspec'] do
9
+ ENV['COVERAGE'] ||= 'true'
10
+ Rake::Task[:spec].invoke
11
+ end
data/rakelib/gem.rake ADDED
@@ -0,0 +1,54 @@
1
+ require 'rubygems/gem_runner'
2
+ require 'berkeley_library/marc/module_info'
3
+
4
+ gem_root_module = BerkeleyLibrary::Marc
5
+
6
+ class << gem_root_module
7
+ def project_root
8
+ @project_root ||= File.expand_path('..', __dir__)
9
+ end
10
+
11
+ def artifacts_dir
12
+ return project_root unless ENV['CI']
13
+
14
+ @artifacts_dir ||= File.join(project_root, 'artifacts')
15
+ end
16
+
17
+ def gemspec_file
18
+ @gemspec_file ||= begin
19
+ gemspec_files = Dir.glob(File.expand_path('*.gemspec', project_root))
20
+ raise ArgumentError, "Too many .gemspecs: #{gemspec_files.join(', ')}" if gemspec_files.size > 1
21
+ raise ArgumentError, 'No .gemspec file found' if gemspec_files.empty?
22
+
23
+ gemspec_files[0]
24
+ end
25
+ end
26
+
27
+ def gemspec_basename
28
+ File.basename(gemspec_file)
29
+ end
30
+
31
+ def output_file
32
+ @output_file ||= begin
33
+ gem_name = File.basename(gemspec_file, '.*')
34
+ version = self::ModuleInfo::VERSION
35
+ basename = "#{gem_name}-#{version}.gem"
36
+ File.join(artifacts_dir, basename)
37
+ end
38
+ end
39
+
40
+ def output_file_relative
41
+ return File.basename(output_file) unless ENV['CI']
42
+
43
+ @output_file_relative ||= begin
44
+ artifacts_dir_relative = File.basename(artifacts_dir)
45
+ File.join(artifacts_dir_relative, File.basename(output_file))
46
+ end
47
+ end
48
+ end
49
+
50
+ desc "Build #{gem_root_module.gemspec_basename} as #{gem_root_module.output_file_relative}"
51
+ task :gem do
52
+ args = ['build', gem_root_module.gemspec_file, "--output=#{gem_root_module.output_file}"]
53
+ Gem::GemRunner.new.run(args)
54
+ end
@@ -0,0 +1,18 @@
1
+ require 'rubocop'
2
+ require 'rubocop/rake_task'
3
+
4
+ desc 'Run rubocop with HTML output'
5
+ RuboCop::RakeTask.new(:rubocop) do |cop|
6
+ output = ENV['RUBOCOP_OUTPUT'] || 'artifacts/rubocop/index.html'
7
+ puts "Writing RuboCop inspection report to #{output}"
8
+
9
+ cop.verbose = false
10
+ cop.formatters = ['html']
11
+ cop.options = ['--out', output]
12
+ end
13
+
14
+ desc 'Run RuboCop with auto-correct, and output results to console'
15
+ task :ra do
16
+ # b/c we want console output, we can't just use `rubocop:auto_correct`
17
+ RuboCop::CLI.new.run(['--safe-auto-correct'])
18
+ end
data/rakelib/spec.rake ADDED
@@ -0,0 +1,2 @@
1
+ require 'rspec/core/rake_task'
2
+ RSpec::Core::RakeTask.new(:spec)
data/spec/.rubocop.yml ADDED
@@ -0,0 +1,37 @@
1
+ inherit_from: ../.rubocop.yml
2
+
3
+ AllCops:
4
+ # Exclude generated files
5
+ Exclude:
6
+ - 'suite/**/*'
7
+
8
+ Style/MultilineBlockChain:
9
+ Enabled: false
10
+
11
+ Style/ParallelAssignment:
12
+ Enabled: false
13
+
14
+ Layout/LineLength:
15
+ Enabled: false
16
+
17
+ Metrics/AbcSize:
18
+ Enabled: false
19
+
20
+ Metrics/BlockLength:
21
+ Enabled: false
22
+
23
+ Metrics/ClassLength:
24
+ Enabled: false
25
+
26
+ Metrics/ModuleLength:
27
+ Enabled: false
28
+
29
+ Metrics/MethodLength:
30
+ Enabled: false
31
+
32
+ ############################################################
33
+ # Added in Rubocop 0.89
34
+
35
+ # Sometimes we're testing the operator
36
+ Lint/BinaryOperatorWithIdenticalOperands:
37
+ Enabled: false
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+ describe VarFieldDef do
8
+ describe 'to_s' do
9
+ let(:fields) { %w[046 048].map { |t| [t, VarFields.standard.find { |vf| vf.tag == t }] }.to_h }
10
+
11
+ it 'returns something like an EBCDList' do
12
+ aggregate_failures('to_s') do
13
+ fields.each do |tag, vf|
14
+ expected = File.read("spec/data/field_info/vf_#{tag}.txt").strip
15
+ actual = vf.to_s.strip
16
+ File.write("tmp/actual_#{tag}.txt", actual) unless actual == expected
17
+ expect(actual).to eq(expected)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,596 @@
1
+ require 'spec_helper'
2
+ require 'parslet/rig/rspec'
3
+
4
+ require 'berkeley_library/marc/field_info/var_fields/var_field_parser'
5
+
6
+ module BerkeleyLibrary
7
+ module Marc
8
+ module FieldInfo
9
+ module VarFields
10
+ describe VarFieldParser do
11
+ let(:parser) { VarFieldParser.new }
12
+ let(:printable_chars) { [0x21..0x3f, 0x5b..0x7b, 0x7d..0x7e].map(&:to_a).flatten.map { |cp| cp.chr(Encoding::UTF_8) } }
13
+
14
+ describe(:blank) do
15
+ it 'matches space' do
16
+ expect(parser.blank).to parse(' ', trace: true)
17
+ end
18
+
19
+ it 'matches tab' do
20
+ expect(parser.blank).to parse("\t", trace: true)
21
+ end
22
+ end
23
+
24
+ describe(:printable) do
25
+ it 'matches space' do
26
+ expect(parser.printable).to parse(' ', trace: true)
27
+ end
28
+
29
+ it 'matches tab' do
30
+ expect(parser.printable).to parse("\t", trace: true)
31
+ end
32
+
33
+ it 'matches ASCII printable, except hyphens' do
34
+ aggregate_failures do
35
+ nonhyphens = printable_chars.reject { |x| x == '-' }
36
+ nonhyphens.each { |c| expect(parser.printable).to parse(c, trace: true) }
37
+ end
38
+ end
39
+
40
+ it 'does not match terminal hyphens' do
41
+ expect(parser.printable).not_to parse('-', trace: true)
42
+ end
43
+ end
44
+
45
+ describe(:text) do
46
+ it 'matches text' do
47
+ expect(parser.text).to parse('Undefined', trace: true)
48
+ end
49
+
50
+ it 'matches internal hyphens' do
51
+ txts = ['SERIES ADDED ENTRY--PERSONAL NAME (R)', 'Variable control fields (002-009)']
52
+ txts.each { |txt| expect(parser.text).to parse(txt, trace: true) }
53
+ end
54
+
55
+ it 'matches leading hyphens' do
56
+ expect(parser.text).to parse('--Series Added Entry Fields (80X-830)', trace: true)
57
+ end
58
+
59
+ it 'does not match terminal hyphens' do
60
+ expect(parser.text).not_to parse('Series Added Entry Fields (80X-830)--', trace: true)
61
+ end
62
+ end
63
+
64
+ describe(:ln_br) do
65
+ it 'matches a Windows line break' do
66
+ expect(parser.ln_br).to parse("\r\n", trace: true)
67
+ end
68
+
69
+ it 'matches Unicode newline characters' do
70
+ chars = [0xa, 0xb, 0xc, 0xd, 0x85, 0x2028, 0x2029].map { |cp| cp.chr(Encoding::UTF_8) }
71
+ aggregate_failures do
72
+ chars.each { |c| expect(parser.ln_br).to parse(c, trace: true) }
73
+ end
74
+ end
75
+ end
76
+
77
+ describe(:eol) do
78
+ it 'matches a newline' do
79
+ expect(parser.eol).to parse("\n", trace: true)
80
+ end
81
+
82
+ it 'matches a newline with leading whitespace' do
83
+ expect(parser.eol).to parse(" \t\n", trace: true)
84
+ end
85
+ end
86
+
87
+ describe(:text) do
88
+ it 'matches an ASCII printable sequence' do
89
+ expect(parser.text).to parse(printable_chars.join, trace: true)
90
+ end
91
+ end
92
+
93
+ describe(:comment) do
94
+ it 'matches a comment' do
95
+ expect(parser.comment).to parse('// this is a comment', trace: true)
96
+ end
97
+
98
+ it 'captures leading whitespace' do
99
+ expect(parser.comment).to parse(" \t// this is a comment", trace: true)
100
+ end
101
+ end
102
+
103
+ describe(:nc) do
104
+ it 'matches a comment' do
105
+ expect(parser.nc).to parse('// this is a comment', trace: true)
106
+ end
107
+
108
+ it 'matches a newline' do
109
+ expect(parser.nc).to parse("\n", trace: true)
110
+ end
111
+
112
+ it 'matches a block of comments and whitespace' do
113
+ block = <<~TXT
114
+
115
+ // this is a comment
116
+
117
+ // here is another comment
118
+ // and another
119
+
120
+ TXT
121
+
122
+ expect(parser.nc).to parse(block, trace: true)
123
+ end
124
+
125
+ it 'matches the empty string' do
126
+ expect(parser.nc).to parse('', trace: true)
127
+ end
128
+
129
+ it 'matches indented comments' do
130
+ block = <<~TXT
131
+ // this is a comment
132
+
133
+ // this is another comment
134
+ // so is this
135
+ TXT
136
+
137
+ expect(parser.nc).to parse(block, trace: true)
138
+ end
139
+ end
140
+
141
+ describe(:ind_def) do
142
+ it 'parses an indicator definition' do
143
+ ind_def = '0 - No added entry'
144
+ expect(parser.ind_def).to parse(ind_def, trace: true)
145
+ end
146
+
147
+ it 'parses an undefined indicator definition' do
148
+ ind_def = '# - Undefined'
149
+ expect(parser.ind_def).to parse(ind_def, trace: true)
150
+ end
151
+
152
+ it 'parses an indicator value range' do
153
+ ind_def = '0-9 - Number of nonfiling characters present'
154
+ expect(parser.ind_def).to parse(ind_def, trace: true)
155
+ end
156
+ end
157
+
158
+ describe(:indicators) do
159
+ it 'matches an empty indicator definition' do
160
+ ind_def = <<~TXT.strip
161
+ Indicators
162
+ First - Undefined
163
+ # - Undefined
164
+ Second - Undefined
165
+ # - Undefined
166
+ TXT
167
+
168
+ expect(parser.indicators).to parse(ind_def, trace: true)
169
+ end
170
+
171
+ it 'parses typical indicators' do
172
+ ind_def = <<~TXT.strip
173
+ Indicators
174
+ First - Access method
175
+ # - No information provided
176
+ 0 - Email
177
+ 1 - FTP
178
+ 2 - Remote login (Telnet)
179
+ 3 - Dial-up
180
+ 4 - HTTP
181
+ 7 - Method specified in subfield $2
182
+ Second - Relationship
183
+ # - No information provided
184
+ 0 - Resource
185
+ 1 - Version of resource
186
+ 2 - Related resource
187
+ 8 - No display constant generated
188
+ TXT
189
+
190
+ expect(parser.indicators).to parse(ind_def, trace: true)
191
+ end
192
+
193
+ it 'parses obsolete indicators' do
194
+ ind_def = <<~TXT.strip
195
+ Indicators
196
+ First - Government jurisdiction (BK MP MU VM)[OBSOLETE]
197
+ 0 - United States [OBSOLETE]
198
+ 1 - Canada [OBSOLETE] [CAN/MARC only]
199
+ 2 - France [OBSOLETE] [CAN/MARC only]
200
+ First - Undefined
201
+ # - Undefined
202
+ Second - Undefined [OBSOLETE]
203
+ # - Undefined [OBSOLETE]
204
+ Second - Display constant controller
205
+ # - Copyright or legal deposit number
206
+ 8 - No display constant controller generated
207
+ TXT
208
+
209
+ expect(parser.indicators).to parse(ind_def, trace: true)
210
+ end
211
+
212
+ it 'parses obsolete indicators with no values' do
213
+ ind_def = <<~TXT.strip
214
+ Indicators
215
+ First - Number source
216
+ # - Source specified in subfield $2
217
+ # - Undefined (BK MP MU VM SE) [OBSOLETE]
218
+ 0 - Superintendent of Documents Classification System
219
+ 1 - Government of Canada Publications: Outline of Classification
220
+ First - Government jurisdiction (BK MP MU VM SE) [OBSOLETE]
221
+ Second - Undefined
222
+ # - Undefined
223
+ TXT
224
+
225
+ expect(parser.indicators).to parse(ind_def, trace: true)
226
+ end
227
+
228
+ it 'parses value ranges' do
229
+ ind_def = <<~TXT.strip
230
+ Indicators
231
+ First - Nonfiling characters
232
+ 0-9 - Number of nonfiling characters present
233
+ # - Nonfiling characters not specified [OBSOLETE]
234
+ Second - Undefined
235
+ # - Undefined
236
+ Second - Main entry/subject relationship (BK MU SE) [OBSOLETE]
237
+ TXT
238
+
239
+ expect(parser.indicators).to parse(ind_def, trace: true)
240
+ end
241
+ end
242
+
243
+ describe(:subfield_value) do
244
+ it 'parses a single-character value' do
245
+ expect(parser.subfield_value).to parse('1 - Form of name', trace: true)
246
+ end
247
+
248
+ it 'parses a multi-character value' do
249
+ expect(parser.subfield_value).to parse('isds/c - ISSN Canada', trace: true)
250
+ end
251
+ end
252
+
253
+ describe(:subfield_def) do
254
+ it 'parses a subfield code definition without values' do
255
+ subfield_def = '$4 - Relationship (R)'
256
+ expect(parser.subfield_def).to parse(subfield_def, trace: true)
257
+ end
258
+
259
+ it 'parses a subfield code definition with values' do
260
+ subfield_def = <<~TXT.strip
261
+ $7 - Control subfield (NR)
262
+ 0 - Type of main entry heading
263
+ 1 - Form of name
264
+ TXT
265
+
266
+ expect(parser.subfield_def).to parse(subfield_def, trace: true)
267
+ end
268
+
269
+ it "doesn't consume next vf definition" do
270
+ non_def = <<~TXT.strip
271
+ $8 - Field link and sequence number (R)
272
+
273
+ 011 - LINKING LIBRARY OF CONGRESS CONTROL NUMBER (NR) [OBSOLETE]
274
+ TXT
275
+ expect(parser.subfield_def).not_to parse(non_def, trace: true)
276
+ end
277
+ end
278
+
279
+ describe(:subfield_codes) do
280
+ it 'parses a typical set of subfield codes' do
281
+ subfield_codes = <<~TXT.strip
282
+ Subfield Codes
283
+ $a - Replacement title (R)
284
+ $i - Explanatory text (R)
285
+ $w - Replacement bibliographic record control number (R)
286
+ $6 - Linkage (NR)
287
+ $8 - Field link and sequence number (R)
288
+ TXT
289
+
290
+ expect(parser.subfield_codes).to parse(subfield_codes, trace: true)
291
+ end
292
+
293
+ it 'parses a range of subfield codes' do
294
+ subfield_codes = <<~TXT.strip
295
+ Subfield Codes
296
+ $a - Tag of the foreign MARC field (NR)
297
+ $b - Content of the foreign MARC field (NR)
298
+ $2 - Source of data (NR)
299
+ $a-z - Foreign MARC subfield (R)
300
+ $0-9 - Foreign MARC subfield (R)
301
+ TXT
302
+
303
+ expect(parser.subfield_codes).to parse(subfield_codes, trace: true)
304
+ end
305
+
306
+ it 'parses a range of subfield codes with missing hyphen before desc' do
307
+ subfield_codes = <<~TXT.strip
308
+ Subfield Codes
309
+ $6 - Linkage (NR)
310
+ $a-z Same as associated field
311
+ $0-5 Same as associated field
312
+ TXT
313
+
314
+ expect(parser.subfield_codes).to parse(subfield_codes, trace: true)
315
+ end
316
+ end
317
+
318
+ describe(:ivc_def) do
319
+ it 'parses instrument or voices codes' do
320
+ ivc_def = <<~TXT.strip
321
+ Instrument or Voices Codes
322
+ ba - Brass - Horn
323
+ bb - Brass--Trumpet
324
+ bc - Brass--Cornet
325
+ TXT
326
+
327
+ expect(parser.ivc_def).to parse(ivc_def, trace: true)
328
+ end
329
+ end
330
+
331
+ describe(:vf) do
332
+ it 'parses a typical field' do
333
+ vf = <<~TXT.strip
334
+ 886 - FOREIGN MARC INFORMATION FIELD (R)
335
+ Indicators
336
+ First - Type of field
337
+ 0 - Leader
338
+ 1 - Variable control fields (002-009)
339
+ 2 - Variable data fields (010-999)
340
+ Second - Undefined
341
+ # - Undefined
342
+ Subfield Codes
343
+ $a - Tag of the foreign MARC field (NR)
344
+ $b - Content of the foreign MARC field (NR)
345
+ $2 - Source of data (NR)
346
+ $a-z - Foreign MARC subfield (R)
347
+ $0-9 - Foreign MARC subfield (R)
348
+ TXT
349
+
350
+ expect(parser.vf).to parse(vf, trace: true)
351
+ end
352
+
353
+ it 'parses a field with no indicators or subfields' do
354
+ vf = '863 - ENUMERATION AND CHRONOLOGY--BASIC BIBLIOGRAPHIC UNIT (R)'
355
+
356
+ expect(parser.vf).to parse(vf, trace: true)
357
+ end
358
+
359
+ it 'parsers a fields with Instrument or Voices Codes' do
360
+ vf = <<~TXT.strip
361
+ 048 - NUMBER OF MUSICAL INSTRUMENTS OR VOICES CODE (R)
362
+ Indicators
363
+ First - Undefined
364
+ # - Undefined
365
+ Second - Source of code
366
+ # - MARC code
367
+ 7 - Source specified in subfield $2
368
+ Subfield Codes
369
+ $a - Performer or ensemble (R)
370
+ $b - Soloist (R)
371
+ $2 - Source of code (NR)
372
+ $8 - Field link and sequence number (R)
373
+ Instrument or Voices Codes
374
+ ba - Brass - Horn
375
+ bb - Brass--Trumpet
376
+ TXT
377
+
378
+ expect(parser.vf).to parse(vf, trace: true)
379
+ end
380
+
381
+ it 'parses a TIND field with bogus indicators' do
382
+ vf = <<~TXT.strip
383
+ 909 - OAI REPOSITORY METADATA (NR)
384
+ Indicators
385
+ First - ???
386
+ C - ??? // Yes, this should be invalid; yes, TIND does it anyway
387
+ Second - ???
388
+ 0 - ???
389
+ Subfield Codes
390
+ $o - OAI URI for the record (oai:<hostname>:<record ID>) (NR)
391
+ $p - setSpec for current valid sets (NR)
392
+ $q - setSpec for previously valid sets (NR)
393
+ TXT
394
+
395
+ expect(parser.vf).to parse(vf, trace: true)
396
+ end
397
+ end
398
+
399
+ describe(:section_header) do
400
+ it 'matches a section header' do
401
+ expect(parser.section_header).to parse('--Number and Code Fields (01X-04X)--', trace: true)
402
+ end
403
+ end
404
+
405
+ describe(:section) do
406
+ it 'matches a section with header' do
407
+ section = <<~TXT.strip
408
+ --Number and Code Fields (01X-04X)--
409
+ 010 - LIBRARY OF CONGRESS CONTROL NUMBER (NR)
410
+ Indicators
411
+ First - Undefined
412
+ # - Undefined
413
+ Second - Undefined
414
+ # - Undefined
415
+ Subfield Codes
416
+ $a - LC control number (NR)
417
+ $b - NUCMC control number (R)
418
+ $z - Canceled/invalid LC control number (R)
419
+ $8 - Field link and sequence number (R)
420
+
421
+ 011 - LINKING LIBRARY OF CONGRESS CONTROL NUMBER (NR) [OBSOLETE]
422
+ Indicators
423
+ First - Undefined
424
+ # - Undefined
425
+ Second - Undefined
426
+ # - Undefined
427
+ Subfield Codes
428
+ $a - LINKING LC control number (R)
429
+ TXT
430
+
431
+ expect(parser.section).to parse(section, trace: true)
432
+ end
433
+
434
+ it 'matches a section without header' do
435
+ section = <<~TXT.strip
436
+ 852 - LOCATION (R)
437
+ Indicators
438
+ First - Undefined
439
+ # - Undefined
440
+ Second - Undefined
441
+ # - Undefined
442
+ Subfield Codes
443
+ $c - Location/Archive/Repository
444
+
445
+ 901 - IDENTIFIERS [INTERNAL] (NR) [REQUIRED WHEN APPLICABLE]
446
+ Indicators
447
+ First - Undefined
448
+ # - Undefined
449
+ Second - Undefined
450
+ # - Undefined
451
+ Subfield Codes
452
+ $a - Ark identifier for METS file [REQUIRED WHEN APPLICABLE]
453
+ $f - Ark identifier for finding aid
454
+ $g - PJID:DBID from GenDB [REQUIRED WHEN APPLICABLE]
455
+ $m - Millennium record number
456
+ $o - OCLC Number
457
+ TXT
458
+
459
+ expect(parser.section).to parse(section, trace: true)
460
+ end
461
+ end
462
+
463
+ describe(:list) do
464
+ it 'parses multiple sections' do
465
+ sections = <<~TXT.strip
466
+ --Number and Code Fields (01X-04X)--
467
+ 010 - LIBRARY OF CONGRESS CONTROL NUMBER (NR)
468
+ Indicators
469
+ First - Undefined
470
+ # - Undefined
471
+ Second - Undefined
472
+ # - Undefined
473
+ Subfield Codes
474
+ $a - LC control number (NR)
475
+ $8 - Field link and sequence number (R)
476
+
477
+ 048 - NUMBER OF MUSICAL INSTRUMENTS OR VOICES CODE (R)
478
+ Indicators
479
+ First - Undefined
480
+ # - Undefined
481
+ Second - Source of code
482
+ 7 - Source specified in subfield $2
483
+ Subfield Codes
484
+ $a - Performer or ensemble (R)
485
+ $8 - Field link and sequence number (R)
486
+ Instrument or Voices Codes
487
+ bc - Brass--Cornet
488
+ bd - Brass--Trombone
489
+
490
+
491
+ --Classification and Call Number Fields (05X-08X)--
492
+ 050 - LIBRARY OF CONGRESS CALL NUMBER (R)
493
+ Indicators
494
+ First - Existence in LC collection
495
+ 0 - Item is in LC
496
+ 1 - Item is not in LC
497
+ Second - Source of call number
498
+ 0 - Assigned by LC
499
+ 4 - Assigned by agency other than LC
500
+ Second - Series call number (SE) [OBSOLETE]
501
+ 0 - No series involved
502
+ 1 - Main series
503
+ Subfield Codes
504
+ $a - Classification number (R)
505
+ $b - Item number (NR)
506
+ TXT
507
+
508
+ expect(parser.list).to parse(sections, trace: true)
509
+ end
510
+
511
+ end
512
+
513
+ describe(:parse) do
514
+ it 'parses the standard list' do
515
+ list = File.read(VarFields::PATH_STANDARD)
516
+ expect(parser).to parse(list, trace: true)
517
+
518
+ parse_tree = parser.parse(list)
519
+ expect(parse_tree.keys).to contain_exactly(:doc_comments, :sections)
520
+
521
+ doc_comments = parse_tree[:doc_comments]
522
+ expected_comments = [
523
+ 'MARC 21 Format for Bibliographic Data Field List',
524
+ 'Adapted from https://www.loc.gov/marc/bibliographic/ecbdlist.html',
525
+ 'Retrieved 2021-01-13'
526
+ ]
527
+
528
+ expect(doc_comments.size).to eq(3)
529
+ expect(doc_comments.map { |c| c[:comment] }).to eq(expected_comments)
530
+
531
+ sections = parse_tree[:sections]
532
+ expected_sections = [
533
+ 'Number and Code Fields (01X-04X)',
534
+ 'Classification and Call Number Fields (05X-08X)',
535
+ 'Main Entry Fields (1XX)',
536
+ 'Title and Title-Related Fields (20X-24X)',
537
+ 'Edition, Imprint, etc. Fields 250-270',
538
+ 'Physical Description, etc. Fields (3XX)',
539
+ 'Series Statement Fields (4XX)',
540
+ 'Note Fields (Part 1: 50X-53X)',
541
+ 'Note Fields (Part 2: 53X-58X)',
542
+ '59X - LOCAL NOTES',
543
+ 'Subject Access Fields (6XX)',
544
+ 'Added Entry Fields (70X-75X)',
545
+ 'Linking Entry Fields (76X-78X)',
546
+ 'Series Added Entry Fields (80X-830)',
547
+ 'Holdings, Location, Alternate Graphics, etc. Fields (841-88X)'
548
+ ]
549
+ expect(sections.map { |s| s[:desc] }).to eq(expected_sections)
550
+ end
551
+
552
+ describe 'non-standard lists' do
553
+ data_dir = File.dirname(VarFields::PATH_STANDARD)
554
+ Dir.glob(File.expand_path('var_fields_*.txt', data_dir)).each do |p|
555
+ next if p == VarFields::PATH_STANDARD
556
+
557
+ basename = File.basename(p)
558
+ it "parses #{basename}" do
559
+ list = File.read(p)
560
+ expect(parser).to parse(list, trace: true)
561
+
562
+ # parse_tree = parser.parse(list)
563
+ # const_name = basename.sub(/^var_fields_/, '').sub(/\.txt/, '_parsed').upcase
564
+ # header = <<~HEADER
565
+ # module UCBLIT
566
+ # module Marc
567
+ # module FieldInfo
568
+ # module VarFields
569
+ # #{const_name} =
570
+ # HEADER
571
+ # footer = <<~FOOTER
572
+ # end
573
+ # end
574
+ # end
575
+ # end
576
+ # FOOTER
577
+ #
578
+ # parse_tree_src = StringIO.new.tap do |out|
579
+ # out.puts(header)
580
+ # PP.pp(parse_tree, out)
581
+ # out.puts(footer)
582
+ # end.string.gsub(/@[0-9]+/, '')
583
+ #
584
+ # parsed_file = p.sub(/\.txt/, '_parsed.rb')
585
+ # File.write(parsed_file, parse_tree_src)
586
+ end
587
+ end
588
+ end
589
+
590
+ end
591
+
592
+ end
593
+ end
594
+ end
595
+ end
596
+ end