pennmarc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(__dir__) unless $LOAD_PATH.include?(__dir__)
4
+
5
+ Dir[File.join(__dir__, 'support', '**', '*.rb')].sort.each { |f| require f }
6
+
7
+ require 'pennmarc'
8
+
9
+ # This file was generated by the `rspec --init` command. Conventionally, all
10
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
11
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
12
+ # this file to always be loaded, without a need to explicitly require it in any
13
+ # files.
14
+ #
15
+ # Given that it is always loaded, you are encouraged to keep this file as
16
+ # light-weight as possible. Requiring heavyweight dependencies from this file
17
+ # will add to the boot time of your test suite on EVERY test run, even for an
18
+ # individual file that may not need all of that loaded. Instead, consider making
19
+ # a separate helper file that requires the additional dependencies and performs
20
+ # the additional setup, and require it from the spec files that actually need
21
+ # it.
22
+ #
23
+ # See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
24
+ RSpec.configure do |config|
25
+ # rspec-expectations config goes here. You can use an alternate
26
+ # assertion/expectation library such as wrong or the stdlib/minitest
27
+ # assertions if you prefer.
28
+ config.expect_with :rspec do |expectations|
29
+ # This option will default to `true` in RSpec 4. It makes the `description`
30
+ # and `failure_message` of custom matchers include text for helper methods
31
+ # defined using `chain`, e.g.:
32
+ # be_bigger_than(2).and_smaller_than(4).description
33
+ # # => "be bigger than 2 and smaller than 4"
34
+ # ...rather than:
35
+ # # => "be bigger than 2"
36
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
37
+ end
38
+
39
+ # rspec-mocks config goes here. You can use an alternate test double
40
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
41
+ config.mock_with :rspec do |mocks|
42
+ # Prevents you from mocking or stubbing a method that does not exist on
43
+ # a real object. This is generally recommended, and will default to
44
+ # `true` in RSpec 4.
45
+ mocks.verify_partial_doubles = true
46
+ end
47
+
48
+ # This option will default to `:apply_to_host_groups` in RSpec 4 (and will
49
+ # have no way to turn it off -- the option exists only for backwards
50
+ # compatibility in RSpec 3). It causes shared context metadata to be
51
+ # inherited by the metadata hash of host groups and examples, rather than
52
+ # triggering implicit auto-inclusion in groups with matching metadata.
53
+ config.shared_context_metadata_behavior = :apply_to_host_groups
54
+
55
+ # The settings below are suggested to provide a good initial experience
56
+ # with RSpec, but feel free to customize to your heart's content.
57
+ # # This allows you to limit a spec run to individual examples or groups
58
+ # # you care about by tagging them with `:focus` metadata. When nothing
59
+ # # is tagged with `:focus`, all examples get run. RSpec also provides
60
+ # # aliases for `it`, `describe`, and `context` that include `:focus`
61
+ # # metadata: `fit`, `fdescribe` and `fcontext`, respectively.
62
+ # config.filter_run_when_matching :focus
63
+ #
64
+ # # Allows RSpec to persist some state between runs in order to support
65
+ # # the `--only-failures` and `--next-failure` CLI options. We recommend
66
+ # # you configure your source control system to ignore this file.
67
+ # config.example_status_persistence_file_path = "spec/examples.txt"
68
+ #
69
+ # # Limits the available syntax to the non-monkey patched syntax that is
70
+ # # recommended. For more details, see:
71
+ # # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/
72
+ # config.disable_monkey_patching!
73
+ #
74
+ # # This setting enables warnings. It's recommended, but in some cases may
75
+ # # be too noisy due to issues in dependencies.
76
+ # config.warnings = true
77
+ #
78
+ # # Many RSpec users commonly either run the entire suite or an individual
79
+ # # file, and it's useful to allow more verbose output when running an
80
+ # # individual spec file.
81
+ # if config.files_to_run.one?
82
+ # # Use the documentation formatter for detailed output,
83
+ # # unless a formatter has already been configured
84
+ # # (e.g. via a command-line flag).
85
+ # config.default_formatter = "doc"
86
+ # end
87
+ #
88
+ # # Print the 10 slowest examples and example groups at the
89
+ # # end of the spec run, to help surface which specs are running
90
+ # # particularly slow.
91
+ # config.profile_examples = 10
92
+ #
93
+ # # Run specs in random order to surface order dependencies. If you find an
94
+ # # order dependency and want to debug it, you can fix the order by providing
95
+ # # the seed, which is printed after each run.
96
+ # # --seed 1234
97
+ # config.order = :random
98
+ #
99
+ # # Seed global randomization in this process using the `--seed` CLI option.
100
+ # # Setting this allows you to use `--seed` to deterministically reproduce
101
+ # # test failures related to randomization by passing the same `--seed` value
102
+ # # as the one that triggered the failure.
103
+ # Kernel.srand config.seed
104
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'marc'
5
+
6
+ module MarcSpecHelpers
7
+ # Return a MARC::XMLReader that will parse a given file and return MARC::Record objects
8
+ # @param [String] filename of MARCXML fixture
9
+ # @return [MARC::Record, NilClass]
10
+ def record_from(filename)
11
+ MARC::XMLReader.new(marc_xml_path(filename)).first
12
+ end
13
+
14
+ # Get the path for a test MARC XML file
15
+ # @param [String] filename of MARCXML fixture
16
+ # @return [String] full path of MARCXML fixture
17
+ def marc_xml_path(filename)
18
+ File.join File.dirname(__FILE__), '..', 'fixtures', 'marcxml', filename
19
+ end
20
+
21
+ # Create an isolated MARC::Subfield object for use in specs or as part of a MARC::Field
22
+ # @param [String] code
23
+ # @param [String] value
24
+ # @return [MARC::Subfield]
25
+ def marc_subfield(code, value)
26
+ MARC::Subfield.new code.to_s, value
27
+ end
28
+
29
+ # Return a new ControlField (000-009)
30
+ # @param [String] tag
31
+ # @param [String] value
32
+ # @return [MARC::ControlField]
33
+ def marc_control_field(tag:, value:)
34
+ MARC::ControlField.new tag, value
35
+ end
36
+
37
+ # Create an isolated MARC::DataField object for use in specs
38
+ # Can pass in tag, indicators and subfields (using simple hash structure). E.g.,
39
+ # marc_field(tag: '650', indicator2: '7'),
40
+ # subfields: { a: 'Tax planning',
41
+ # m: ['Multiple', 'Subfields']
42
+ # z: 'United States.',
43
+ # '0': http://id.loc.gov/authorities/subjects/sh2008112546 }
44
+ # )
45
+ # @param [String (frozen)] tag MARC tag, e.g., 001, 665
46
+ # @param [String (frozen)] indicator1 MARC indicator, e.g., 0
47
+ # @param [String (frozen)] indicator2
48
+ # @param [Hash] subfields hash of subfield values as code => value or code => [value, value]
49
+ # @return [MARC::DataField]
50
+ def marc_field(tag: 'TST', indicator1: ' ', indicator2: ' ', subfields: {})
51
+ subfield_objects = subfields.each_with_object([]) do |(code, value), array|
52
+ Array.wrap(value).map { |v| array << marc_subfield(code, v) }
53
+ end
54
+ MARC::DataField.new tag, indicator1, indicator2, *subfield_objects
55
+ end
56
+
57
+ # Return a MARC::Record containing passed in DataFields
58
+ # @param [Array<MARC::DataField>] fields
59
+ # @param [String, nil] leader
60
+ # @return [MARC::Record]
61
+ def marc_record(fields: [], leader: nil)
62
+ record = MARC::Record.new
63
+ fields.each { |field| record << field }
64
+ record.leader = leader if leader
65
+ record
66
+ end
67
+
68
+ # Mock map for location lookup using Location helper
69
+ # The location codes :dent and :stor are the two outermost keys
70
+ # :specific_location, :library, :display are the inner keys that store location values
71
+ # @example
72
+ # location_map[:stor][:library] #=> 'LIBRA'
73
+ # @return [Hash]
74
+ def location_map
75
+ {
76
+ dent: { specific_location: 'Levy Dental Medicine Library - Stacks',
77
+ library: ['Health Sciences Libraries', 'Levy Dental Medicine Library'],
78
+ display: 'Levy Dental Medicine Library - Stacks' },
79
+ stor: { specific_location: 'LIBRA',
80
+ library: 'LIBRA',
81
+ display: 'LIBRA' }
82
+ }
83
+ end
84
+ end
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pennmarc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Mike Kanning
8
+ - Amrey Mathurin
9
+ - Patrick Perkins
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2023-07-17 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: marc
17
+ requirement: !ruby/object:Gem::Requirement
18
+ requirements:
19
+ - - ">="
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ version: '0'
29
+ - !ruby/object:Gem::Dependency
30
+ name: nokogiri
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ type: :development
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ - !ruby/object:Gem::Dependency
44
+ name: rspec
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ - !ruby/object:Gem::Dependency
58
+ name: simplecov
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ - !ruby/object:Gem::Dependency
72
+ name: upennlib-rubocop
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ description: Penn Libraries Catalog MARC parsing wisdom for cross-project usage
86
+ email: mkanning@upenn.edu
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".rspec"
93
+ - ".ruby-version"
94
+ - Gemfile
95
+ - Gemfile.lock
96
+ - README.md
97
+ - legacy/indexer.rb
98
+ - legacy/marc.rb
99
+ - legacy/test_file_output.json
100
+ - lib/pennmarc.rb
101
+ - lib/pennmarc/encoding_level.rb
102
+ - lib/pennmarc/enriched_marc.rb
103
+ - lib/pennmarc/heading_control.rb
104
+ - lib/pennmarc/helpers/citation.rb
105
+ - lib/pennmarc/helpers/creator.rb
106
+ - lib/pennmarc/helpers/database.rb
107
+ - lib/pennmarc/helpers/date.rb
108
+ - lib/pennmarc/helpers/edition.rb
109
+ - lib/pennmarc/helpers/format.rb
110
+ - lib/pennmarc/helpers/genre.rb
111
+ - lib/pennmarc/helpers/helper.rb
112
+ - lib/pennmarc/helpers/identifier.rb
113
+ - lib/pennmarc/helpers/language.rb
114
+ - lib/pennmarc/helpers/link.rb
115
+ - lib/pennmarc/helpers/location.rb
116
+ - lib/pennmarc/helpers/note.rb
117
+ - lib/pennmarc/helpers/production.rb
118
+ - lib/pennmarc/helpers/relation.rb
119
+ - lib/pennmarc/helpers/series.rb
120
+ - lib/pennmarc/helpers/subject.rb
121
+ - lib/pennmarc/helpers/title.rb
122
+ - lib/pennmarc/mappings/language.yml
123
+ - lib/pennmarc/mappings/locations.yml
124
+ - lib/pennmarc/mappings/relator.yml
125
+ - lib/pennmarc/parser.rb
126
+ - lib/pennmarc/util.rb
127
+ - pennmarc.gemspec
128
+ - spec/fixtures/marcxml/test.xml
129
+ - spec/lib/pennmarc/helpers/citation_spec.rb
130
+ - spec/lib/pennmarc/helpers/creator_spec.rb
131
+ - spec/lib/pennmarc/helpers/database_spec.rb
132
+ - spec/lib/pennmarc/helpers/date_spec.rb
133
+ - spec/lib/pennmarc/helpers/edition_spec.rb
134
+ - spec/lib/pennmarc/helpers/format_spec.rb
135
+ - spec/lib/pennmarc/helpers/genre_spec.rb
136
+ - spec/lib/pennmarc/helpers/identifer_spec.rb
137
+ - spec/lib/pennmarc/helpers/language_spec.rb
138
+ - spec/lib/pennmarc/helpers/location_spec.rb
139
+ - spec/lib/pennmarc/helpers/note_spec.rb
140
+ - spec/lib/pennmarc/helpers/production_spec.rb
141
+ - spec/lib/pennmarc/helpers/relation_spec.rb
142
+ - spec/lib/pennmarc/helpers/subject_spec.rb
143
+ - spec/lib/pennmarc/helpers/title_spec.rb
144
+ - spec/lib/pennmarc/marc_util_spec.rb
145
+ - spec/lib/pennmarc/parser_spec.rb
146
+ - spec/spec_helper.rb
147
+ - spec/support/marc_spec_helpers.rb
148
+ homepage: https://gitlab.library.upenn.edu/dld/catalog/pennmarc
149
+ licenses:
150
+ - MIT
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: '3.2'
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubygems_version: 3.4.10
168
+ signing_key:
169
+ specification_version: 4
170
+ summary: Penn Libraries Catalog MARC parsing wisdom for cross-project usage
171
+ test_files: []