pennmarc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ $LOAD_PATH.unshift(__dir__) unless $LOAD_PATH.include?(__dir__)
4
+
5
+ Dir[File.join(__dir__, 'support', '**', '*.rb')].sort.each { |f| require f }
6
+
7
+ require 'pennmarc'
8
+
9
+ # This file was generated by the `rspec --init` command. Conventionally, all
10
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
11
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
12
+ # this file to always be loaded, without a need to explicitly require it in any
13
+ # files.
14
+ #
15
+ # Given that it is always loaded, you are encouraged to keep this file as
16
+ # light-weight as possible. Requiring heavyweight dependencies from this file
17
+ # will add to the boot time of your test suite on EVERY test run, even for an
18
+ # individual file that may not need all of that loaded. Instead, consider making
19
+ # a separate helper file that requires the additional dependencies and performs
20
+ # the additional setup, and require it from the spec files that actually need
21
+ # it.
22
+ #
23
+ # See https://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
24
+ RSpec.configure do |config|
25
+ # rspec-expectations config goes here. You can use an alternate
26
+ # assertion/expectation library such as wrong or the stdlib/minitest
27
+ # assertions if you prefer.
28
+ config.expect_with :rspec do |expectations|
29
+ # This option will default to `true` in RSpec 4. It makes the `description`
30
+ # and `failure_message` of custom matchers include text for helper methods
31
+ # defined using `chain`, e.g.:
32
+ # be_bigger_than(2).and_smaller_than(4).description
33
+ # # => "be bigger than 2 and smaller than 4"
34
+ # ...rather than:
35
+ # # => "be bigger than 2"
36
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
37
+ end
38
+
39
+ # rspec-mocks config goes here. You can use an alternate test double
40
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
41
+ config.mock_with :rspec do |mocks|
42
+ # Prevents you from mocking or stubbing a method that does not exist on
43
+ # a real object. This is generally recommended, and will default to
44
+ # `true` in RSpec 4.
45
+ mocks.verify_partial_doubles = true
46
+ end
47
+
48
+ # This option will default to `:apply_to_host_groups` in RSpec 4 (and will
49
+ # have no way to turn it off -- the option exists only for backwards
50
+ # compatibility in RSpec 3). It causes shared context metadata to be
51
+ # inherited by the metadata hash of host groups and examples, rather than
52
+ # triggering implicit auto-inclusion in groups with matching metadata.
53
+ config.shared_context_metadata_behavior = :apply_to_host_groups
54
+
55
+ # The settings below are suggested to provide a good initial experience
56
+ # with RSpec, but feel free to customize to your heart's content.
57
+ # # This allows you to limit a spec run to individual examples or groups
58
+ # # you care about by tagging them with `:focus` metadata. When nothing
59
+ # # is tagged with `:focus`, all examples get run. RSpec also provides
60
+ # # aliases for `it`, `describe`, and `context` that include `:focus`
61
+ # # metadata: `fit`, `fdescribe` and `fcontext`, respectively.
62
+ # config.filter_run_when_matching :focus
63
+ #
64
+ # # Allows RSpec to persist some state between runs in order to support
65
+ # # the `--only-failures` and `--next-failure` CLI options. We recommend
66
+ # # you configure your source control system to ignore this file.
67
+ # config.example_status_persistence_file_path = "spec/examples.txt"
68
+ #
69
+ # # Limits the available syntax to the non-monkey patched syntax that is
70
+ # # recommended. For more details, see:
71
+ # # https://rspec.info/features/3-12/rspec-core/configuration/zero-monkey-patching-mode/
72
+ # config.disable_monkey_patching!
73
+ #
74
+ # # This setting enables warnings. It's recommended, but in some cases may
75
+ # # be too noisy due to issues in dependencies.
76
+ # config.warnings = true
77
+ #
78
+ # # Many RSpec users commonly either run the entire suite or an individual
79
+ # # file, and it's useful to allow more verbose output when running an
80
+ # # individual spec file.
81
+ # if config.files_to_run.one?
82
+ # # Use the documentation formatter for detailed output,
83
+ # # unless a formatter has already been configured
84
+ # # (e.g. via a command-line flag).
85
+ # config.default_formatter = "doc"
86
+ # end
87
+ #
88
+ # # Print the 10 slowest examples and example groups at the
89
+ # # end of the spec run, to help surface which specs are running
90
+ # # particularly slow.
91
+ # config.profile_examples = 10
92
+ #
93
+ # # Run specs in random order to surface order dependencies. If you find an
94
+ # # order dependency and want to debug it, you can fix the order by providing
95
+ # # the seed, which is printed after each run.
96
+ # # --seed 1234
97
+ # config.order = :random
98
+ #
99
+ # # Seed global randomization in this process using the `--seed` CLI option.
100
+ # # Setting this allows you to use `--seed` to deterministically reproduce
101
+ # # test failures related to randomization by passing the same `--seed` value
102
+ # # as the one that triggered the failure.
103
+ # Kernel.srand config.seed
104
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'nokogiri'
4
+ require 'marc'
5
+
6
+ module MarcSpecHelpers
7
+ # Return a MARC::XMLReader that will parse a given file and return MARC::Record objects
8
+ # @param [String] filename of MARCXML fixture
9
+ # @return [MARC::Record, NilClass]
10
+ def record_from(filename)
11
+ MARC::XMLReader.new(marc_xml_path(filename)).first
12
+ end
13
+
14
+ # Get the path for a test MARC XML file
15
+ # @param [String] filename of MARCXML fixture
16
+ # @return [String] full path of MARCXML fixture
17
+ def marc_xml_path(filename)
18
+ File.join File.dirname(__FILE__), '..', 'fixtures', 'marcxml', filename
19
+ end
20
+
21
+ # Create an isolated MARC::Subfield object for use in specs or as part of a MARC::Field
22
+ # @param [String] code
23
+ # @param [String] value
24
+ # @return [MARC::Subfield]
25
+ def marc_subfield(code, value)
26
+ MARC::Subfield.new code.to_s, value
27
+ end
28
+
29
+ # Return a new ControlField (000-009)
30
+ # @param [String] tag
31
+ # @param [String] value
32
+ # @return [MARC::ControlField]
33
+ def marc_control_field(tag:, value:)
34
+ MARC::ControlField.new tag, value
35
+ end
36
+
37
+ # Create an isolated MARC::DataField object for use in specs
38
+ # Can pass in tag, indicators and subfields (using simple hash structure). E.g.,
39
+ # marc_field(tag: '650', indicator2: '7'),
40
+ # subfields: { a: 'Tax planning',
41
+ # m: ['Multiple', 'Subfields']
42
+ # z: 'United States.',
43
+ # '0': http://id.loc.gov/authorities/subjects/sh2008112546 }
44
+ # )
45
+ # @param [String (frozen)] tag MARC tag, e.g., 001, 665
46
+ # @param [String (frozen)] indicator1 MARC indicator, e.g., 0
47
+ # @param [String (frozen)] indicator2
48
+ # @param [Hash] subfields hash of subfield values as code => value or code => [value, value]
49
+ # @return [MARC::DataField]
50
+ def marc_field(tag: 'TST', indicator1: ' ', indicator2: ' ', subfields: {})
51
+ subfield_objects = subfields.each_with_object([]) do |(code, value), array|
52
+ Array.wrap(value).map { |v| array << marc_subfield(code, v) }
53
+ end
54
+ MARC::DataField.new tag, indicator1, indicator2, *subfield_objects
55
+ end
56
+
57
+ # Return a MARC::Record containing passed in DataFields
58
+ # @param [Array<MARC::DataField>] fields
59
+ # @param [String, nil] leader
60
+ # @return [MARC::Record]
61
+ def marc_record(fields: [], leader: nil)
62
+ record = MARC::Record.new
63
+ fields.each { |field| record << field }
64
+ record.leader = leader if leader
65
+ record
66
+ end
67
+
68
+ # Mock map for location lookup using Location helper
69
+ # The location codes :dent and :stor are the two outermost keys
70
+ # :specific_location, :library, :display are the inner keys that store location values
71
+ # @example
72
+ # location_map[:stor][:library] #=> 'LIBRA'
73
+ # @return [Hash]
74
+ def location_map
75
+ {
76
+ dent: { specific_location: 'Levy Dental Medicine Library - Stacks',
77
+ library: ['Health Sciences Libraries', 'Levy Dental Medicine Library'],
78
+ display: 'Levy Dental Medicine Library - Stacks' },
79
+ stor: { specific_location: 'LIBRA',
80
+ library: 'LIBRA',
81
+ display: 'LIBRA' }
82
+ }
83
+ end
84
+ end
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pennmarc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Mike Kanning
8
+ - Amrey Mathurin
9
+ - Patrick Perkins
10
+ autorequire:
11
+ bindir: bin
12
+ cert_chain: []
13
+ date: 2023-07-17 00:00:00.000000000 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: marc
17
+ requirement: !ruby/object:Gem::Requirement
18
+ requirements:
19
+ - - ">="
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ requirements:
26
+ - - ">="
27
+ - !ruby/object:Gem::Version
28
+ version: '0'
29
+ - !ruby/object:Gem::Dependency
30
+ name: nokogiri
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ type: :development
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: '0'
43
+ - !ruby/object:Gem::Dependency
44
+ name: rspec
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ - !ruby/object:Gem::Dependency
58
+ name: simplecov
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ - !ruby/object:Gem::Dependency
72
+ name: upennlib-rubocop
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ description: Penn Libraries Catalog MARC parsing wisdom for cross-project usage
86
+ email: mkanning@upenn.edu
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".rspec"
93
+ - ".ruby-version"
94
+ - Gemfile
95
+ - Gemfile.lock
96
+ - README.md
97
+ - legacy/indexer.rb
98
+ - legacy/marc.rb
99
+ - legacy/test_file_output.json
100
+ - lib/pennmarc.rb
101
+ - lib/pennmarc/encoding_level.rb
102
+ - lib/pennmarc/enriched_marc.rb
103
+ - lib/pennmarc/heading_control.rb
104
+ - lib/pennmarc/helpers/citation.rb
105
+ - lib/pennmarc/helpers/creator.rb
106
+ - lib/pennmarc/helpers/database.rb
107
+ - lib/pennmarc/helpers/date.rb
108
+ - lib/pennmarc/helpers/edition.rb
109
+ - lib/pennmarc/helpers/format.rb
110
+ - lib/pennmarc/helpers/genre.rb
111
+ - lib/pennmarc/helpers/helper.rb
112
+ - lib/pennmarc/helpers/identifier.rb
113
+ - lib/pennmarc/helpers/language.rb
114
+ - lib/pennmarc/helpers/link.rb
115
+ - lib/pennmarc/helpers/location.rb
116
+ - lib/pennmarc/helpers/note.rb
117
+ - lib/pennmarc/helpers/production.rb
118
+ - lib/pennmarc/helpers/relation.rb
119
+ - lib/pennmarc/helpers/series.rb
120
+ - lib/pennmarc/helpers/subject.rb
121
+ - lib/pennmarc/helpers/title.rb
122
+ - lib/pennmarc/mappings/language.yml
123
+ - lib/pennmarc/mappings/locations.yml
124
+ - lib/pennmarc/mappings/relator.yml
125
+ - lib/pennmarc/parser.rb
126
+ - lib/pennmarc/util.rb
127
+ - pennmarc.gemspec
128
+ - spec/fixtures/marcxml/test.xml
129
+ - spec/lib/pennmarc/helpers/citation_spec.rb
130
+ - spec/lib/pennmarc/helpers/creator_spec.rb
131
+ - spec/lib/pennmarc/helpers/database_spec.rb
132
+ - spec/lib/pennmarc/helpers/date_spec.rb
133
+ - spec/lib/pennmarc/helpers/edition_spec.rb
134
+ - spec/lib/pennmarc/helpers/format_spec.rb
135
+ - spec/lib/pennmarc/helpers/genre_spec.rb
136
+ - spec/lib/pennmarc/helpers/identifer_spec.rb
137
+ - spec/lib/pennmarc/helpers/language_spec.rb
138
+ - spec/lib/pennmarc/helpers/location_spec.rb
139
+ - spec/lib/pennmarc/helpers/note_spec.rb
140
+ - spec/lib/pennmarc/helpers/production_spec.rb
141
+ - spec/lib/pennmarc/helpers/relation_spec.rb
142
+ - spec/lib/pennmarc/helpers/subject_spec.rb
143
+ - spec/lib/pennmarc/helpers/title_spec.rb
144
+ - spec/lib/pennmarc/marc_util_spec.rb
145
+ - spec/lib/pennmarc/parser_spec.rb
146
+ - spec/spec_helper.rb
147
+ - spec/support/marc_spec_helpers.rb
148
+ homepage: https://gitlab.library.upenn.edu/dld/catalog/pennmarc
149
+ licenses:
150
+ - MIT
151
+ metadata: {}
152
+ post_install_message:
153
+ rdoc_options: []
154
+ require_paths:
155
+ - lib
156
+ required_ruby_version: !ruby/object:Gem::Requirement
157
+ requirements:
158
+ - - ">="
159
+ - !ruby/object:Gem::Version
160
+ version: '3.2'
161
+ required_rubygems_version: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - ">="
164
+ - !ruby/object:Gem::Version
165
+ version: '0'
166
+ requirements: []
167
+ rubygems_version: 3.4.10
168
+ signing_key:
169
+ specification_version: 4
170
+ summary: Penn Libraries Catalog MARC parsing wisdom for cross-project usage
171
+ test_files: []