berkeley_library-marc 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/marc.iml +101 -0
  6. data/.idea/misc.xml +4 -0
  7. data/.idea/modules.xml +8 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +12 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +4 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-marc.gemspec +42 -0
  21. data/docker-compose.yml +15 -0
  22. data/lib/.rubocop.yml +6 -0
  23. data/lib/berkeley_library/marc.rb +3 -0
  24. data/lib/berkeley_library/marc/field_info.rb +1 -0
  25. data/lib/berkeley_library/marc/field_info/ctrl_fields/data/ctrl_fields_standard.txt +2143 -0
  26. data/lib/berkeley_library/marc/field_info/leader/data/leader_standard.txt +87 -0
  27. data/lib/berkeley_library/marc/field_info/var_fields.rb +46 -0
  28. data/lib/berkeley_library/marc/field_info/var_fields/data.rb +4 -0
  29. data/lib/berkeley_library/marc/field_info/var_fields/data/mapping-orig.tsv +265 -0
  30. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx.txt +53 -0
  31. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_berkeley_9xx_parsed.rb +51 -0
  32. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard.txt +5458 -0
  33. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_standard_parsed.rb +6577 -0
  34. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved.txt +44 -0
  35. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_tind_reserved_parsed.rb +30 -0
  36. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind.txt +105 -0
  37. data/lib/berkeley_library/marc/field_info/var_fields/data/var_fields_ucblit_tind_parsed.rb +114 -0
  38. data/lib/berkeley_library/marc/field_info/var_fields/ind_def.rb +39 -0
  39. data/lib/berkeley_library/marc/field_info/var_fields/ind_val_def.rb +27 -0
  40. data/lib/berkeley_library/marc/field_info/var_fields/instrument_or_voices_code.rb +26 -0
  41. data/lib/berkeley_library/marc/field_info/var_fields/obsolescible.rb +55 -0
  42. data/lib/berkeley_library/marc/field_info/var_fields/section.rb +50 -0
  43. data/lib/berkeley_library/marc/field_info/var_fields/subfield_def.rb +50 -0
  44. data/lib/berkeley_library/marc/field_info/var_fields/subfield_val.rb +24 -0
  45. data/lib/berkeley_library/marc/field_info/var_fields/var_field_def.rb +62 -0
  46. data/lib/berkeley_library/marc/field_info/var_fields/var_field_list.rb +43 -0
  47. data/lib/berkeley_library/marc/field_info/var_fields/var_field_parser.rb +136 -0
  48. data/lib/berkeley_library/marc/field_info/var_fields/var_field_transform.rb +160 -0
  49. data/lib/berkeley_library/marc/module_info.rb +14 -0
  50. data/lib/marc_extensions.rb +1 -0
  51. data/lib/marc_extensions/data_field.rb +29 -0
  52. data/lib/marc_extensions/field_map.rb +63 -0
  53. data/lib/marc_extensions/record.rb +100 -0
  54. data/lib/marc_extensions/subfield.rb +21 -0
  55. data/lib/marc_extensions/xml_reader.rb +19 -0
  56. data/rakelib/bundle.rake +8 -0
  57. data/rakelib/coverage.rake +11 -0
  58. data/rakelib/gem.rake +54 -0
  59. data/rakelib/rubocop.rake +18 -0
  60. data/rakelib/spec.rake +2 -0
  61. data/spec/.rubocop.yml +37 -0
  62. data/spec/berkeley_library/marc/field_info/var_fields/var_field_def_spec.rb +26 -0
  63. data/spec/berkeley_library/marc/field_info/var_fields/var_field_parser_spec.rb +596 -0
  64. data/spec/berkeley_library/marc/field_info/var_fields/var_field_transform_spec.rb +173 -0
  65. data/spec/berkeley_library/marc/field_info/var_fields_spec.rb +112 -0
  66. data/spec/data/field_info/vf_046.txt +32 -0
  67. data/spec/data/field_info/vf_048.txt +112 -0
  68. data/spec/data/record-187888.xml +78 -0
  69. data/spec/marc_extensions/data_field_spec.rb +13 -0
  70. data/spec/marc_extensions/record_spec.rb +211 -0
  71. data/spec/spec_helper.rb +27 -0
  72. metadata +354 -0
@@ -0,0 +1,14 @@
1
+ module BerkeleyLibrary
2
+ module Marc
3
+ class ModuleInfo
4
+ NAME = 'berkeley_library-marc'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'MARC utilities for the UC Berkeley Library'.freeze
8
+ DESCRIPTION = 'A gem providing MARC-related utility code and extensions to ruby-marc for the UC Berkeley Library'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '0.2.0'.freeze
11
+ HOMEPAGE = 'https://github.com/BerkeleyLibrary/marc'.freeze
12
+ end
13
+ end
14
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('marc_extensions/*.rb', __dir__)).sort.each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'marc'
2
+ require 'marc_extensions/subfield'
3
+
4
+ module MARCExtensions
5
+ module DataFieldExtensions
6
+ def subfield_codes
7
+ subfields.map(&:code)
8
+ end
9
+
10
+ def frozen?
11
+ [tag, indicator1, indicator2, subfields].all?(&:frozen?)
12
+ subfields.all?(&:frozen?)
13
+ end
14
+
15
+ def freeze
16
+ [tag, indicator1, indicator2].each(&:freeze)
17
+ subfields.each(&:freeze)
18
+ subfields.freeze
19
+ self
20
+ end
21
+ end
22
+ end
23
+
24
+ module MARC
25
+ # @see https://rubydoc.info/gems/marc/MARC/DataField RubyGems documentation
26
+ class DataField
27
+ prepend MARCExtensions::DataFieldExtensions
28
+ end
29
+ end
@@ -0,0 +1,63 @@
1
+ require 'marc'
2
+ module MARCExtensions
3
+ module FieldMapExtensions
4
+
5
+ VALID_TAGS = ('000'..'999').freeze
6
+
7
+ # Gets the specified fields in order by tag.
8
+ #
9
+ # @overload each_sorted_by_tag(tags, &block)
10
+ # Yields each specified field.
11
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
12
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
13
+ # @overload each_sorted_by_tag(tags)
14
+ # An enumerator of the specified variable fields, sorted by tag.
15
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
16
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
17
+ # @overload each_sorted_by_tag(&block)
18
+ # Yields all fields, sorted by tag.
19
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
20
+ # @overload each_sorted_by_tag
21
+ # An enumerator of all fields, sorted by tag.
22
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
23
+ def each_sorted_by_tag(tags = nil, &block)
24
+ reindex unless @clean
25
+
26
+ indices_for(tags).map { |i| self[i] }.each(&block)
27
+ end
28
+
29
+ private
30
+
31
+ def indices_for(tags)
32
+ return all_indices unless tags
33
+
34
+ sorted_tag_array(tags)
35
+ .lazy # prevent unnecessary allocations
36
+ .map { |t| @tags[t] } # get indices for each tag
37
+ .reject(&:nil?) # ignoring any tags we don't have fields for
38
+ .flat_map { |x| x } # flatten list of indices -- equiv. Array#flatten
39
+ end
40
+
41
+ def all_indices
42
+ [].tap do |a|
43
+ @tags.keys.sort.map do |t|
44
+ a.concat(@tags[t])
45
+ end
46
+ end
47
+ end
48
+
49
+ def sorted_tag_array(tags)
50
+ return Array(tags) if tags.is_a?(Range)
51
+
52
+ Array(tags).sort
53
+ end
54
+
55
+ end
56
+ end
57
+
58
+ module MARC
59
+ # @see https://rubydoc.info/gems/marc/MARC/FieldMap RubyGems documentation
60
+ class FieldMap
61
+ prepend MARCExtensions::FieldMapExtensions
62
+ end
63
+ end
@@ -0,0 +1,100 @@
1
+ require 'marc'
2
+ require 'marc_extensions/field_map'
3
+ require 'marc_extensions/data_field'
4
+
5
+ module MARCExtensions
6
+ module RecordExtensions
7
+
8
+ # Gets the specified fields in order by tag.
9
+ #
10
+ # @see FieldMapExtensions#each_sorted_by_tag
11
+ # @overload each_sorted_by_tag(tags, &block)
12
+ # Yields each specified field.
13
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
14
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
15
+ # @overload each_sorted_by_tag(tags)
16
+ # An enumerator of the specified variable fields, sorted by tag.
17
+ # @param tags [String, Enumerable<String>] A tag, range of tags, array of tags, or similar
18
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
19
+ # @overload each_sorted_by_tag(&block)
20
+ # Yields all fields, sorted by tag.
21
+ # @yieldparam field [MARC::ControlField, MARC::DataField] Each field.
22
+ # @overload each_sorted_by_tag
23
+ # An enumerator of all fields, sorted by tag.
24
+ # @return [Enumerator::Lazy<MARC::ControlField, MARC::DataField>] the fields
25
+ def each_sorted_by_tag(tags = nil, &block)
26
+ @fields.each_sorted_by_tag(tags, &block)
27
+ end
28
+
29
+ # Gets only the control fields (tag 000-009) from the record. (Note that
30
+ # this method does not protect against pathological records with data
31
+ # fields in the control field range.)
32
+ #
33
+ # @overload each_control_field
34
+ # An enumerator of the control fields.
35
+ # @return [Enumerator::Lazy<MARC::ControlField>] the fields
36
+ # @overload each_control_field(&block)
37
+ # Yields each control field.
38
+ # @yieldparam field [MARC::ControlField] Each control field.
39
+ def each_control_field(&block)
40
+ each_sorted_by_tag.take_while { |df| df.tag.to_i <= 10 }.each(&block)
41
+ end
42
+
43
+ # Gets only the data fields (tag 010-999) from the record. (Note that
44
+ # this method does not protect against pathological records with control
45
+ # fields in the data field range.)
46
+ #
47
+ # @overload each_data_field
48
+ # An enumerator of the data fields.
49
+ # @return [Enumerator::Lazy<MARC::DataField>] the fields
50
+ # @overload each_data_field(&block)
51
+ # Yields each data field.
52
+ # @yieldparam field [MARC::DataField] Each data field.
53
+ def each_data_field(&block)
54
+ each_sorted_by_tag.select { |df| df.tag.to_i > 10 }.each(&block)
55
+ end
56
+
57
+ # Gets the data fields from the record and groups them by tag.
58
+ #
59
+ # @return [Hash<String, Array<MARC::DataField>>] a hash from tags to fields
60
+ def data_fields_by_tag
61
+ # noinspection RubyYardReturnMatch
62
+ each_data_field.with_object({}) { |df, t2df| (t2df[df.tag] ||= []) << df }
63
+ end
64
+
65
+ # Gets only the data fields (tag 010-999) from the record. (Note that
66
+ # this method does not protect against pathological records with control
67
+ # fields in the data field range.)
68
+ #
69
+ # @return [Array<DataField>] the data fields.
70
+ def data_fields
71
+ data_fields_by_tag.values.flatten
72
+ end
73
+
74
+ # Freezes the leader and fields.
75
+ def freeze
76
+ leader.freeze
77
+ fields.each(&:freeze)
78
+ fields.freeze
79
+ self
80
+ end
81
+
82
+ # @return [Boolean] true if the fields and leader are frozen
83
+ def frozen?
84
+ (fields.frozen? && leader.frozen?)
85
+ end
86
+
87
+ # TODO: use info from parsed documentation? or move to TIND-specific extension
88
+ def record_id
89
+ cf_001 = self['001']
90
+ return cf_001.value if cf_001
91
+ end
92
+ end
93
+ end
94
+
95
+ module MARC
96
+ # @see https://rubydoc.info/gems/marc/MARC/Record RubyGems documentation
97
+ class Record
98
+ prepend MARCExtensions::RecordExtensions
99
+ end
100
+ end
@@ -0,0 +1,21 @@
1
+ require 'marc'
2
+
3
+ module MARCExtensions
4
+ module SubfieldExtensions
5
+ def frozen?
6
+ [code, value].all?(&:frozen?)
7
+ end
8
+
9
+ def freeze
10
+ [code, value].each(&:freeze)
11
+ self
12
+ end
13
+ end
14
+ end
15
+
16
+ module MARC
17
+ # @see https://rubydoc.info/gems/marc/MARC/Subfield RubyGems documentation
18
+ class Subfield
19
+ prepend MARCExtensions::SubfieldExtensions
20
+ end
21
+ end
@@ -0,0 +1,19 @@
1
+ require 'marc'
2
+ require 'marc_extensions/record'
3
+
4
+ module MARCExtensions
5
+ module XMLReaderClassExtensions
6
+ def read(file, freeze: false)
7
+ new(file, freeze: freeze)
8
+ end
9
+ end
10
+ end
11
+
12
+ module MARC
13
+ # @see https://rubydoc.info/gems/marc/MARC/XMLReader RubyGems documentation
14
+ class XMLReader
15
+ class << self
16
+ prepend MARCExtensions::XMLReaderClassExtensions
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,8 @@
1
+ namespace :bundle do
2
+ desc 'Updates the ruby-advisory-db then runs bundle-audit'
3
+ task :audit do
4
+ require 'bundler/audit/cli'
5
+ Bundler::Audit::CLI.start ['update']
6
+ Bundler::Audit::CLI.start %w[check --ignore CVE-2015-9284]
7
+ end
8
+ end
@@ -0,0 +1,11 @@
1
+ require 'ci/reporter/rake/rspec'
2
+
3
+ # Configure CI::Reporter report generation
4
+ ENV['GENERATE_REPORTS'] ||= 'true'
5
+ ENV['CI_REPORTS'] = 'artifacts/rspec'
6
+
7
+ desc 'Run all specs in spec directory, with coverage'
8
+ task coverage: ['ci:setup:rspec'] do
9
+ ENV['COVERAGE'] ||= 'true'
10
+ Rake::Task[:spec].invoke
11
+ end
data/rakelib/gem.rake ADDED
@@ -0,0 +1,54 @@
1
+ require 'rubygems/gem_runner'
2
+ require 'berkeley_library/marc/module_info'
3
+
4
+ gem_root_module = BerkeleyLibrary::Marc
5
+
6
+ class << gem_root_module
7
+ def project_root
8
+ @project_root ||= File.expand_path('..', __dir__)
9
+ end
10
+
11
+ def artifacts_dir
12
+ return project_root unless ENV['CI']
13
+
14
+ @artifacts_dir ||= File.join(project_root, 'artifacts')
15
+ end
16
+
17
+ def gemspec_file
18
+ @gemspec_file ||= begin
19
+ gemspec_files = Dir.glob(File.expand_path('*.gemspec', project_root))
20
+ raise ArgumentError, "Too many .gemspecs: #{gemspec_files.join(', ')}" if gemspec_files.size > 1
21
+ raise ArgumentError, 'No .gemspec file found' if gemspec_files.empty?
22
+
23
+ gemspec_files[0]
24
+ end
25
+ end
26
+
27
+ def gemspec_basename
28
+ File.basename(gemspec_file)
29
+ end
30
+
31
+ def output_file
32
+ @output_file ||= begin
33
+ gem_name = File.basename(gemspec_file, '.*')
34
+ version = self::ModuleInfo::VERSION
35
+ basename = "#{gem_name}-#{version}.gem"
36
+ File.join(artifacts_dir, basename)
37
+ end
38
+ end
39
+
40
+ def output_file_relative
41
+ return File.basename(output_file) unless ENV['CI']
42
+
43
+ @output_file_relative ||= begin
44
+ artifacts_dir_relative = File.basename(artifacts_dir)
45
+ File.join(artifacts_dir_relative, File.basename(output_file))
46
+ end
47
+ end
48
+ end
49
+
50
+ desc "Build #{gem_root_module.gemspec_basename} as #{gem_root_module.output_file_relative}"
51
+ task :gem do
52
+ args = ['build', gem_root_module.gemspec_file, "--output=#{gem_root_module.output_file}"]
53
+ Gem::GemRunner.new.run(args)
54
+ end
@@ -0,0 +1,18 @@
1
+ require 'rubocop'
2
+ require 'rubocop/rake_task'
3
+
4
+ desc 'Run rubocop with HTML output'
5
+ RuboCop::RakeTask.new(:rubocop) do |cop|
6
+ output = ENV['RUBOCOP_OUTPUT'] || 'artifacts/rubocop/index.html'
7
+ puts "Writing RuboCop inspection report to #{output}"
8
+
9
+ cop.verbose = false
10
+ cop.formatters = ['html']
11
+ cop.options = ['--out', output]
12
+ end
13
+
14
+ desc 'Run RuboCop with auto-correct, and output results to console'
15
+ task :ra do
16
+ # b/c we want console output, we can't just use `rubocop:auto_correct`
17
+ RuboCop::CLI.new.run(['--safe-auto-correct'])
18
+ end
data/rakelib/spec.rake ADDED
@@ -0,0 +1,2 @@
1
+ require 'rspec/core/rake_task'
2
+ RSpec::Core::RakeTask.new(:spec)
data/spec/.rubocop.yml ADDED
@@ -0,0 +1,37 @@
1
+ inherit_from: ../.rubocop.yml
2
+
3
+ AllCops:
4
+ # Exclude generated files
5
+ Exclude:
6
+ - 'suite/**/*'
7
+
8
+ Style/MultilineBlockChain:
9
+ Enabled: false
10
+
11
+ Style/ParallelAssignment:
12
+ Enabled: false
13
+
14
+ Layout/LineLength:
15
+ Enabled: false
16
+
17
+ Metrics/AbcSize:
18
+ Enabled: false
19
+
20
+ Metrics/BlockLength:
21
+ Enabled: false
22
+
23
+ Metrics/ClassLength:
24
+ Enabled: false
25
+
26
+ Metrics/ModuleLength:
27
+ Enabled: false
28
+
29
+ Metrics/MethodLength:
30
+ Enabled: false
31
+
32
+ ############################################################
33
+ # Added in Rubocop 0.89
34
+
35
+ # Sometimes we're testing the operator
36
+ Lint/BinaryOperatorWithIdenticalOperands:
37
+ Enabled: false
@@ -0,0 +1,26 @@
1
+ require 'spec_helper'
2
+
3
+ module BerkeleyLibrary
4
+ module Marc
5
+ module FieldInfo
6
+ module VarFields
7
+ describe VarFieldDef do
8
+ describe 'to_s' do
9
+ let(:fields) { %w[046 048].map { |t| [t, VarFields.standard.find { |vf| vf.tag == t }] }.to_h }
10
+
11
+ it 'returns something like an EBCDList' do
12
+ aggregate_failures('to_s') do
13
+ fields.each do |tag, vf|
14
+ expected = File.read("spec/data/field_info/vf_#{tag}.txt").strip
15
+ actual = vf.to_s.strip
16
+ File.write("tmp/actual_#{tag}.txt", actual) unless actual == expected
17
+ expect(actual).to eq(expected)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,596 @@
1
+ require 'spec_helper'
2
+ require 'parslet/rig/rspec'
3
+
4
+ require 'berkeley_library/marc/field_info/var_fields/var_field_parser'
5
+
6
+ module BerkeleyLibrary
7
+ module Marc
8
+ module FieldInfo
9
+ module VarFields
10
+ describe VarFieldParser do
11
+ let(:parser) { VarFieldParser.new }
12
+ let(:printable_chars) { [0x21..0x3f, 0x5b..0x7b, 0x7d..0x7e].map(&:to_a).flatten.map { |cp| cp.chr(Encoding::UTF_8) } }
13
+
14
+ describe(:blank) do
15
+ it 'matches space' do
16
+ expect(parser.blank).to parse(' ', trace: true)
17
+ end
18
+
19
+ it 'matches tab' do
20
+ expect(parser.blank).to parse("\t", trace: true)
21
+ end
22
+ end
23
+
24
+ describe(:printable) do
25
+ it 'matches space' do
26
+ expect(parser.printable).to parse(' ', trace: true)
27
+ end
28
+
29
+ it 'matches tab' do
30
+ expect(parser.printable).to parse("\t", trace: true)
31
+ end
32
+
33
+ it 'matches ASCII printable, except hyphens' do
34
+ aggregate_failures do
35
+ nonhyphens = printable_chars.reject { |x| x == '-' }
36
+ nonhyphens.each { |c| expect(parser.printable).to parse(c, trace: true) }
37
+ end
38
+ end
39
+
40
+ it 'does not match terminal hyphens' do
41
+ expect(parser.printable).not_to parse('-', trace: true)
42
+ end
43
+ end
44
+
45
+ describe(:text) do
46
+ it 'matches text' do
47
+ expect(parser.text).to parse('Undefined', trace: true)
48
+ end
49
+
50
+ it 'matches internal hyphens' do
51
+ txts = ['SERIES ADDED ENTRY--PERSONAL NAME (R)', 'Variable control fields (002-009)']
52
+ txts.each { |txt| expect(parser.text).to parse(txt, trace: true) }
53
+ end
54
+
55
+ it 'matches leading hyphens' do
56
+ expect(parser.text).to parse('--Series Added Entry Fields (80X-830)', trace: true)
57
+ end
58
+
59
+ it 'does not match terminal hyphens' do
60
+ expect(parser.text).not_to parse('Series Added Entry Fields (80X-830)--', trace: true)
61
+ end
62
+ end
63
+
64
+ describe(:ln_br) do
65
+ it 'matches a Windows line break' do
66
+ expect(parser.ln_br).to parse("\r\n", trace: true)
67
+ end
68
+
69
+ it 'matches Unicode newline characters' do
70
+ chars = [0xa, 0xb, 0xc, 0xd, 0x85, 0x2028, 0x2029].map { |cp| cp.chr(Encoding::UTF_8) }
71
+ aggregate_failures do
72
+ chars.each { |c| expect(parser.ln_br).to parse(c, trace: true) }
73
+ end
74
+ end
75
+ end
76
+
77
+ describe(:eol) do
78
+ it 'matches a newline' do
79
+ expect(parser.eol).to parse("\n", trace: true)
80
+ end
81
+
82
+ it 'matches a newline with leading whitespace' do
83
+ expect(parser.eol).to parse(" \t\n", trace: true)
84
+ end
85
+ end
86
+
87
+ describe(:text) do
88
+ it 'matches an ASCII printable sequence' do
89
+ expect(parser.text).to parse(printable_chars.join, trace: true)
90
+ end
91
+ end
92
+
93
+ describe(:comment) do
94
+ it 'matches a comment' do
95
+ expect(parser.comment).to parse('// this is a comment', trace: true)
96
+ end
97
+
98
+ it 'captures leading whitespace' do
99
+ expect(parser.comment).to parse(" \t// this is a comment", trace: true)
100
+ end
101
+ end
102
+
103
+ describe(:nc) do
104
+ it 'matches a comment' do
105
+ expect(parser.nc).to parse('// this is a comment', trace: true)
106
+ end
107
+
108
+ it 'matches a newline' do
109
+ expect(parser.nc).to parse("\n", trace: true)
110
+ end
111
+
112
+ it 'matches a block of comments and whitespace' do
113
+ block = <<~TXT
114
+
115
+ // this is a comment
116
+
117
+ // here is another comment
118
+ // and another
119
+
120
+ TXT
121
+
122
+ expect(parser.nc).to parse(block, trace: true)
123
+ end
124
+
125
+ it 'matches the empty string' do
126
+ expect(parser.nc).to parse('', trace: true)
127
+ end
128
+
129
+ it 'matches indented comments' do
130
+ block = <<~TXT
131
+ // this is a comment
132
+
133
+ // this is another comment
134
+ // so is this
135
+ TXT
136
+
137
+ expect(parser.nc).to parse(block, trace: true)
138
+ end
139
+ end
140
+
141
+ describe(:ind_def) do
142
+ it 'parses an indicator definition' do
143
+ ind_def = '0 - No added entry'
144
+ expect(parser.ind_def).to parse(ind_def, trace: true)
145
+ end
146
+
147
+ it 'parses an undefined indicator definition' do
148
+ ind_def = '# - Undefined'
149
+ expect(parser.ind_def).to parse(ind_def, trace: true)
150
+ end
151
+
152
+ it 'parses an indicator value range' do
153
+ ind_def = '0-9 - Number of nonfiling characters present'
154
+ expect(parser.ind_def).to parse(ind_def, trace: true)
155
+ end
156
+ end
157
+
158
+ describe(:indicators) do
159
+ it 'matches an empty indicator definition' do
160
+ ind_def = <<~TXT.strip
161
+ Indicators
162
+ First - Undefined
163
+ # - Undefined
164
+ Second - Undefined
165
+ # - Undefined
166
+ TXT
167
+
168
+ expect(parser.indicators).to parse(ind_def, trace: true)
169
+ end
170
+
171
+ it 'parses typical indicators' do
172
+ ind_def = <<~TXT.strip
173
+ Indicators
174
+ First - Access method
175
+ # - No information provided
176
+ 0 - Email
177
+ 1 - FTP
178
+ 2 - Remote login (Telnet)
179
+ 3 - Dial-up
180
+ 4 - HTTP
181
+ 7 - Method specified in subfield $2
182
+ Second - Relationship
183
+ # - No information provided
184
+ 0 - Resource
185
+ 1 - Version of resource
186
+ 2 - Related resource
187
+ 8 - No display constant generated
188
+ TXT
189
+
190
+ expect(parser.indicators).to parse(ind_def, trace: true)
191
+ end
192
+
193
+ it 'parses obsolete indicators' do
194
+ ind_def = <<~TXT.strip
195
+ Indicators
196
+ First - Government jurisdiction (BK MP MU VM)[OBSOLETE]
197
+ 0 - United States [OBSOLETE]
198
+ 1 - Canada [OBSOLETE] [CAN/MARC only]
199
+ 2 - France [OBSOLETE] [CAN/MARC only]
200
+ First - Undefined
201
+ # - Undefined
202
+ Second - Undefined [OBSOLETE]
203
+ # - Undefined [OBSOLETE]
204
+ Second - Display constant controller
205
+ # - Copyright or legal deposit number
206
+ 8 - No display constant controller generated
207
+ TXT
208
+
209
+ expect(parser.indicators).to parse(ind_def, trace: true)
210
+ end
211
+
212
+ it 'parses obsolete indicators with no values' do
213
+ ind_def = <<~TXT.strip
214
+ Indicators
215
+ First - Number source
216
+ # - Source specified in subfield $2
217
+ # - Undefined (BK MP MU VM SE) [OBSOLETE]
218
+ 0 - Superintendent of Documents Classification System
219
+ 1 - Government of Canada Publications: Outline of Classification
220
+ First - Government jurisdiction (BK MP MU VM SE) [OBSOLETE]
221
+ Second - Undefined
222
+ # - Undefined
223
+ TXT
224
+
225
+ expect(parser.indicators).to parse(ind_def, trace: true)
226
+ end
227
+
228
+ it 'parses value ranges' do
229
+ ind_def = <<~TXT.strip
230
+ Indicators
231
+ First - Nonfiling characters
232
+ 0-9 - Number of nonfiling characters present
233
+ # - Nonfiling characters not specified [OBSOLETE]
234
+ Second - Undefined
235
+ # - Undefined
236
+ Second - Main entry/subject relationship (BK MU SE) [OBSOLETE]
237
+ TXT
238
+
239
+ expect(parser.indicators).to parse(ind_def, trace: true)
240
+ end
241
+ end
242
+
243
+ describe(:subfield_value) do
244
+ it 'parses a single-character value' do
245
+ expect(parser.subfield_value).to parse('1 - Form of name', trace: true)
246
+ end
247
+
248
+ it 'parses a multi-character value' do
249
+ expect(parser.subfield_value).to parse('isds/c - ISSN Canada', trace: true)
250
+ end
251
+ end
252
+
253
+ describe(:subfield_def) do
254
+ it 'parses a subfield code definition without values' do
255
+ subfield_def = '$4 - Relationship (R)'
256
+ expect(parser.subfield_def).to parse(subfield_def, trace: true)
257
+ end
258
+
259
+ it 'parses a subfield code definition with values' do
260
+ subfield_def = <<~TXT.strip
261
+ $7 - Control subfield (NR)
262
+ 0 - Type of main entry heading
263
+ 1 - Form of name
264
+ TXT
265
+
266
+ expect(parser.subfield_def).to parse(subfield_def, trace: true)
267
+ end
268
+
269
+ it "doesn't consume next vf definition" do
270
+ non_def = <<~TXT.strip
271
+ $8 - Field link and sequence number (R)
272
+
273
+ 011 - LINKING LIBRARY OF CONGRESS CONTROL NUMBER (NR) [OBSOLETE]
274
+ TXT
275
+ expect(parser.subfield_def).not_to parse(non_def, trace: true)
276
+ end
277
+ end
278
+
279
+ describe(:subfield_codes) do
280
+ it 'parses a typical set of subfield codes' do
281
+ subfield_codes = <<~TXT.strip
282
+ Subfield Codes
283
+ $a - Replacement title (R)
284
+ $i - Explanatory text (R)
285
+ $w - Replacement bibliographic record control number (R)
286
+ $6 - Linkage (NR)
287
+ $8 - Field link and sequence number (R)
288
+ TXT
289
+
290
+ expect(parser.subfield_codes).to parse(subfield_codes, trace: true)
291
+ end
292
+
293
+ it 'parses a range of subfield codes' do
294
+ subfield_codes = <<~TXT.strip
295
+ Subfield Codes
296
+ $a - Tag of the foreign MARC field (NR)
297
+ $b - Content of the foreign MARC field (NR)
298
+ $2 - Source of data (NR)
299
+ $a-z - Foreign MARC subfield (R)
300
+ $0-9 - Foreign MARC subfield (R)
301
+ TXT
302
+
303
+ expect(parser.subfield_codes).to parse(subfield_codes, trace: true)
304
+ end
305
+
306
+ it 'parses a range of subfield codes with missing hyphen before desc' do
307
+ subfield_codes = <<~TXT.strip
308
+ Subfield Codes
309
+ $6 - Linkage (NR)
310
+ $a-z Same as associated field
311
+ $0-5 Same as associated field
312
+ TXT
313
+
314
+ expect(parser.subfield_codes).to parse(subfield_codes, trace: true)
315
+ end
316
+ end
317
+
318
+ describe(:ivc_def) do
319
+ it 'parses instrument or voices codes' do
320
+ ivc_def = <<~TXT.strip
321
+ Instrument or Voices Codes
322
+ ba - Brass - Horn
323
+ bb - Brass--Trumpet
324
+ bc - Brass--Cornet
325
+ TXT
326
+
327
+ expect(parser.ivc_def).to parse(ivc_def, trace: true)
328
+ end
329
+ end
330
+
331
+ describe(:vf) do
332
+ it 'parses a typical field' do
333
+ vf = <<~TXT.strip
334
+ 886 - FOREIGN MARC INFORMATION FIELD (R)
335
+ Indicators
336
+ First - Type of field
337
+ 0 - Leader
338
+ 1 - Variable control fields (002-009)
339
+ 2 - Variable data fields (010-999)
340
+ Second - Undefined
341
+ # - Undefined
342
+ Subfield Codes
343
+ $a - Tag of the foreign MARC field (NR)
344
+ $b - Content of the foreign MARC field (NR)
345
+ $2 - Source of data (NR)
346
+ $a-z - Foreign MARC subfield (R)
347
+ $0-9 - Foreign MARC subfield (R)
348
+ TXT
349
+
350
+ expect(parser.vf).to parse(vf, trace: true)
351
+ end
352
+
353
+ it 'parses a field with no indicators or subfields' do
354
+ vf = '863 - ENUMERATION AND CHRONOLOGY--BASIC BIBLIOGRAPHIC UNIT (R)'
355
+
356
+ expect(parser.vf).to parse(vf, trace: true)
357
+ end
358
+
359
+ it 'parsers a fields with Instrument or Voices Codes' do
360
+ vf = <<~TXT.strip
361
+ 048 - NUMBER OF MUSICAL INSTRUMENTS OR VOICES CODE (R)
362
+ Indicators
363
+ First - Undefined
364
+ # - Undefined
365
+ Second - Source of code
366
+ # - MARC code
367
+ 7 - Source specified in subfield $2
368
+ Subfield Codes
369
+ $a - Performer or ensemble (R)
370
+ $b - Soloist (R)
371
+ $2 - Source of code (NR)
372
+ $8 - Field link and sequence number (R)
373
+ Instrument or Voices Codes
374
+ ba - Brass - Horn
375
+ bb - Brass--Trumpet
376
+ TXT
377
+
378
+ expect(parser.vf).to parse(vf, trace: true)
379
+ end
380
+
381
+ it 'parses a TIND field with bogus indicators' do
382
+ vf = <<~TXT.strip
383
+ 909 - OAI REPOSITORY METADATA (NR)
384
+ Indicators
385
+ First - ???
386
+ C - ??? // Yes, this should be invalid; yes, TIND does it anyway
387
+ Second - ???
388
+ 0 - ???
389
+ Subfield Codes
390
+ $o - OAI URI for the record (oai:<hostname>:<record ID>) (NR)
391
+ $p - setSpec for current valid sets (NR)
392
+ $q - setSpec for previously valid sets (NR)
393
+ TXT
394
+
395
+ expect(parser.vf).to parse(vf, trace: true)
396
+ end
397
+ end
398
+
399
+ describe(:section_header) do
400
+ it 'matches a section header' do
401
+ expect(parser.section_header).to parse('--Number and Code Fields (01X-04X)--', trace: true)
402
+ end
403
+ end
404
+
405
+ describe(:section) do
406
+ it 'matches a section with header' do
407
+ section = <<~TXT.strip
408
+ --Number and Code Fields (01X-04X)--
409
+ 010 - LIBRARY OF CONGRESS CONTROL NUMBER (NR)
410
+ Indicators
411
+ First - Undefined
412
+ # - Undefined
413
+ Second - Undefined
414
+ # - Undefined
415
+ Subfield Codes
416
+ $a - LC control number (NR)
417
+ $b - NUCMC control number (R)
418
+ $z - Canceled/invalid LC control number (R)
419
+ $8 - Field link and sequence number (R)
420
+
421
+ 011 - LINKING LIBRARY OF CONGRESS CONTROL NUMBER (NR) [OBSOLETE]
422
+ Indicators
423
+ First - Undefined
424
+ # - Undefined
425
+ Second - Undefined
426
+ # - Undefined
427
+ Subfield Codes
428
+ $a - LINKING LC control number (R)
429
+ TXT
430
+
431
+ expect(parser.section).to parse(section, trace: true)
432
+ end
433
+
434
+ it 'matches a section without header' do
435
+ section = <<~TXT.strip
436
+ 852 - LOCATION (R)
437
+ Indicators
438
+ First - Undefined
439
+ # - Undefined
440
+ Second - Undefined
441
+ # - Undefined
442
+ Subfield Codes
443
+ $c - Location/Archive/Repository
444
+
445
+ 901 - IDENTIFIERS [INTERNAL] (NR) [REQUIRED WHEN APPLICABLE]
446
+ Indicators
447
+ First - Undefined
448
+ # - Undefined
449
+ Second - Undefined
450
+ # - Undefined
451
+ Subfield Codes
452
+ $a - Ark identifier for METS file [REQUIRED WHEN APPLICABLE]
453
+ $f - Ark identifier for finding aid
454
+ $g - PJID:DBID from GenDB [REQUIRED WHEN APPLICABLE]
455
+ $m - Millennium record number
456
+ $o - OCLC Number
457
+ TXT
458
+
459
+ expect(parser.section).to parse(section, trace: true)
460
+ end
461
+ end
462
+
463
+ describe(:list) do
464
+ it 'parses multiple sections' do
465
+ sections = <<~TXT.strip
466
+ --Number and Code Fields (01X-04X)--
467
+ 010 - LIBRARY OF CONGRESS CONTROL NUMBER (NR)
468
+ Indicators
469
+ First - Undefined
470
+ # - Undefined
471
+ Second - Undefined
472
+ # - Undefined
473
+ Subfield Codes
474
+ $a - LC control number (NR)
475
+ $8 - Field link and sequence number (R)
476
+
477
+ 048 - NUMBER OF MUSICAL INSTRUMENTS OR VOICES CODE (R)
478
+ Indicators
479
+ First - Undefined
480
+ # - Undefined
481
+ Second - Source of code
482
+ 7 - Source specified in subfield $2
483
+ Subfield Codes
484
+ $a - Performer or ensemble (R)
485
+ $8 - Field link and sequence number (R)
486
+ Instrument or Voices Codes
487
+ bc - Brass--Cornet
488
+ bd - Brass--Trombone
489
+
490
+
491
+ --Classification and Call Number Fields (05X-08X)--
492
+ 050 - LIBRARY OF CONGRESS CALL NUMBER (R)
493
+ Indicators
494
+ First - Existence in LC collection
495
+ 0 - Item is in LC
496
+ 1 - Item is not in LC
497
+ Second - Source of call number
498
+ 0 - Assigned by LC
499
+ 4 - Assigned by agency other than LC
500
+ Second - Series call number (SE) [OBSOLETE]
501
+ 0 - No series involved
502
+ 1 - Main series
503
+ Subfield Codes
504
+ $a - Classification number (R)
505
+ $b - Item number (NR)
506
+ TXT
507
+
508
+ expect(parser.list).to parse(sections, trace: true)
509
+ end
510
+
511
+ end
512
+
513
+ describe(:parse) do
514
+ it 'parses the standard list' do
515
+ list = File.read(VarFields::PATH_STANDARD)
516
+ expect(parser).to parse(list, trace: true)
517
+
518
+ parse_tree = parser.parse(list)
519
+ expect(parse_tree.keys).to contain_exactly(:doc_comments, :sections)
520
+
521
+ doc_comments = parse_tree[:doc_comments]
522
+ expected_comments = [
523
+ 'MARC 21 Format for Bibliographic Data Field List',
524
+ 'Adapted from https://www.loc.gov/marc/bibliographic/ecbdlist.html',
525
+ 'Retrieved 2021-01-13'
526
+ ]
527
+
528
+ expect(doc_comments.size).to eq(3)
529
+ expect(doc_comments.map { |c| c[:comment] }).to eq(expected_comments)
530
+
531
+ sections = parse_tree[:sections]
532
+ expected_sections = [
533
+ 'Number and Code Fields (01X-04X)',
534
+ 'Classification and Call Number Fields (05X-08X)',
535
+ 'Main Entry Fields (1XX)',
536
+ 'Title and Title-Related Fields (20X-24X)',
537
+ 'Edition, Imprint, etc. Fields 250-270',
538
+ 'Physical Description, etc. Fields (3XX)',
539
+ 'Series Statement Fields (4XX)',
540
+ 'Note Fields (Part 1: 50X-53X)',
541
+ 'Note Fields (Part 2: 53X-58X)',
542
+ '59X - LOCAL NOTES',
543
+ 'Subject Access Fields (6XX)',
544
+ 'Added Entry Fields (70X-75X)',
545
+ 'Linking Entry Fields (76X-78X)',
546
+ 'Series Added Entry Fields (80X-830)',
547
+ 'Holdings, Location, Alternate Graphics, etc. Fields (841-88X)'
548
+ ]
549
+ expect(sections.map { |s| s[:desc] }).to eq(expected_sections)
550
+ end
551
+
552
+ describe 'non-standard lists' do
553
+ data_dir = File.dirname(VarFields::PATH_STANDARD)
554
+ Dir.glob(File.expand_path('var_fields_*.txt', data_dir)).each do |p|
555
+ next if p == VarFields::PATH_STANDARD
556
+
557
+ basename = File.basename(p)
558
+ it "parses #{basename}" do
559
+ list = File.read(p)
560
+ expect(parser).to parse(list, trace: true)
561
+
562
+ # parse_tree = parser.parse(list)
563
+ # const_name = basename.sub(/^var_fields_/, '').sub(/\.txt/, '_parsed').upcase
564
+ # header = <<~HEADER
565
+ # module UCBLIT
566
+ # module Marc
567
+ # module FieldInfo
568
+ # module VarFields
569
+ # #{const_name} =
570
+ # HEADER
571
+ # footer = <<~FOOTER
572
+ # end
573
+ # end
574
+ # end
575
+ # end
576
+ # FOOTER
577
+ #
578
+ # parse_tree_src = StringIO.new.tap do |out|
579
+ # out.puts(header)
580
+ # PP.pp(parse_tree, out)
581
+ # out.puts(footer)
582
+ # end.string.gsub(/@[0-9]+/, '')
583
+ #
584
+ # parsed_file = p.sub(/\.txt/, '_parsed.rb')
585
+ # File.write(parsed_file, parse_tree_src)
586
+ end
587
+ end
588
+ end
589
+
590
+ end
591
+
592
+ end
593
+ end
594
+ end
595
+ end
596
+ end