pennmarc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5480cac8bd9e0c8ae5fe096945f7dccb9c56730ee33f9fa35cee0aaee202fa6b
4
+ data.tar.gz: 38c4aa42f396b061cc7813320cc714081b85563fc9bf2f689588887018610863
5
+ SHA512:
6
+ metadata.gz: e577b914ce87e01bdc5ddd95680b7e547398d40c12bda4f52ebf258abd80882b87bd53948dd306a3fa82935694eb57f0196c86819f602f5049d8932a5a8529e3
7
+ data.tar.gz: ede363f6fffbd352cd8cdb6ad97290f97a1bd14f7665788a82303c75625244d78d2ef301497d3892ff2c886e6c04e0fa8fd1685d6bfdf2bd78486155c79da117
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+
2
+ # Ignore YARD products
3
+ .yardoc
4
+ doc/
5
+
6
+ *.gem
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.2.2
data/Gemfile ADDED
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gem 'activesupport', '~> 7'
6
+ gem 'library_stdnums', '~> 1.6'
7
+ gem 'marc', '~> 1.2'
8
+ gem 'nokogiri', '~> 1.15'
9
+ gem 'rake', '~> 13.0'
10
+
11
+ group :test, :development do
12
+ gem 'rspec', '~> 3.12'
13
+ end
14
+
15
+ group :test do
16
+ gem 'simplecov', '~> 0.22'
17
+ end
18
+
19
+ group :development do
20
+ gem 'upennlib-rubocop', require: false
21
+ gem 'webrick', '~> 1.8'
22
+ gem 'yard', '~> 0.9'
23
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,119 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activesupport (7.0.5)
5
+ concurrent-ruby (~> 1.0, >= 1.0.2)
6
+ i18n (>= 1.6, < 2)
7
+ minitest (>= 5.1)
8
+ tzinfo (~> 2.0)
9
+ ast (2.4.2)
10
+ concurrent-ruby (1.2.2)
11
+ diff-lcs (1.5.0)
12
+ docile (1.4.0)
13
+ i18n (1.13.0)
14
+ concurrent-ruby (~> 1.0)
15
+ json (2.6.3)
16
+ library_stdnums (1.6.0)
17
+ marc (1.2.0)
18
+ rexml
19
+ scrub_rb (>= 1.0.1, < 2)
20
+ unf
21
+ minitest (5.18.0)
22
+ nokogiri (1.15.2-arm64-darwin)
23
+ racc (~> 1.4)
24
+ nokogiri (1.15.2-x86_64-linux)
25
+ racc (~> 1.4)
26
+ parallel (1.23.0)
27
+ parser (3.2.2.1)
28
+ ast (~> 2.4.1)
29
+ racc (1.6.2)
30
+ rack (3.0.7)
31
+ rainbow (3.1.1)
32
+ rake (13.0.6)
33
+ regexp_parser (2.8.0)
34
+ rexml (3.2.5)
35
+ rspec (3.12.0)
36
+ rspec-core (~> 3.12.0)
37
+ rspec-expectations (~> 3.12.0)
38
+ rspec-mocks (~> 3.12.0)
39
+ rspec-core (3.12.2)
40
+ rspec-support (~> 3.12.0)
41
+ rspec-expectations (3.12.3)
42
+ diff-lcs (>= 1.2.0, < 2.0)
43
+ rspec-support (~> 3.12.0)
44
+ rspec-mocks (3.12.5)
45
+ diff-lcs (>= 1.2.0, < 2.0)
46
+ rspec-support (~> 3.12.0)
47
+ rspec-support (3.12.0)
48
+ rubocop (1.51.0)
49
+ json (~> 2.3)
50
+ parallel (~> 1.10)
51
+ parser (>= 3.2.0.0)
52
+ rainbow (>= 2.2.2, < 4.0)
53
+ regexp_parser (>= 1.8, < 3.0)
54
+ rexml (>= 3.2.5, < 4.0)
55
+ rubocop-ast (>= 1.28.0, < 2.0)
56
+ ruby-progressbar (~> 1.7)
57
+ unicode-display_width (>= 2.4.0, < 3.0)
58
+ rubocop-ast (1.28.1)
59
+ parser (>= 3.2.1.0)
60
+ rubocop-capybara (2.18.0)
61
+ rubocop (~> 1.41)
62
+ rubocop-factory_bot (2.23.1)
63
+ rubocop (~> 1.33)
64
+ rubocop-performance (1.18.0)
65
+ rubocop (>= 1.7.0, < 2.0)
66
+ rubocop-ast (>= 0.4.0)
67
+ rubocop-rails (2.19.1)
68
+ activesupport (>= 4.2.0)
69
+ rack (>= 1.1)
70
+ rubocop (>= 1.33.0, < 2.0)
71
+ rubocop-rake (0.6.0)
72
+ rubocop (~> 1.0)
73
+ rubocop-rspec (2.22.0)
74
+ rubocop (~> 1.33)
75
+ rubocop-capybara (~> 2.17)
76
+ rubocop-factory_bot (~> 2.22)
77
+ ruby-progressbar (1.13.0)
78
+ scrub_rb (1.0.1)
79
+ simplecov (0.22.0)
80
+ docile (~> 1.1)
81
+ simplecov-html (~> 0.11)
82
+ simplecov_json_formatter (~> 0.1)
83
+ simplecov-html (0.12.3)
84
+ simplecov_json_formatter (0.1.4)
85
+ tzinfo (2.0.6)
86
+ concurrent-ruby (~> 1.0)
87
+ unf (0.1.4)
88
+ unf_ext
89
+ unf_ext (0.0.8.2)
90
+ unicode-display_width (2.4.2)
91
+ upennlib-rubocop (1.1.0)
92
+ rubocop (~> 1.24)
93
+ rubocop-capybara
94
+ rubocop-performance
95
+ rubocop-rails
96
+ rubocop-rake
97
+ rubocop-rspec
98
+ webrick (1.8.1)
99
+ yard (0.9.34)
100
+
101
+ PLATFORMS
102
+ arm64-darwin-21
103
+ arm64-darwin-22
104
+ x86_64-linux
105
+
106
+ DEPENDENCIES
107
+ activesupport (~> 7)
108
+ library_stdnums (~> 1.6)
109
+ marc (~> 1.2)
110
+ nokogiri (~> 1.15)
111
+ rake (~> 13.0)
112
+ rspec (~> 3.12)
113
+ simplecov (~> 0.22)
114
+ upennlib-rubocop
115
+ webrick (~> 1.8)
116
+ yard (~> 0.9)
117
+
118
+ BUNDLED WITH
119
+ 2.4.10
data/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # Penn Libraries MARC Parser
2
+
3
+ This gem embodies the received and newfound wisdom of Penn Libraries MARC parsing practice. The values returned by this
4
+ parser should be agnostic about the particular discovery system in which it is included. Most of this was extracted from
5
+ the "Nouveau Franklin" project aka [discovery_app](https://gitlab.library.upenn.edu/franklin/discovery-app).
6
+
7
+ When included in a project, it should be utilized like this:
8
+
9
+ ```ruby
10
+ parser = PennMARC::Parser.new # eventually we will pass in some mappings...
11
+ puts parser.title_show(marc_record) # Title intended for display
12
+ ```
13
+
14
+ All methods will require a `MARC::Record` object. For more about these, see the
15
+ [ruby-marc](https://github.com/ruby-marc/ruby-marc) gem documentation
16
+
17
+ ## Development
18
+
19
+ ### Requirements
20
+ - ruby 3.2.2, other versions will probably work
21
+
22
+ ### Setup
23
+
24
+ After cloning the repository and setting up Ruby for the project, run `bundle install` to install the gems.
25
+
26
+ ### Organization
27
+
28
+ Classes in the `helpers` directory bring together common fields that may share logic. `PennMARC::Util` holds methods
29
+ used for common tasks such as joining subfields.
30
+
31
+ ### Documentation
32
+
33
+ Highly descriptive and accurate documentation of MARC parsing practices will improve developer happiness, as well as
34
+ that of library collaborators. To this end, developers should utilize
35
+ [YARD documentation syntax](https://rubydoc.info/gems/yard/file/docs/GettingStarted.md) as appropriate.
36
+
37
+ A YARD documentation server can be run during development and will reload with updated docs as you work:
38
+
39
+ ```bash
40
+ yard server --reload
41
+ ```
42
+
43
+ When successful, the documentation pages will be available at [http://localhost:8808](http://localhost:8808).
44
+
45
+ ### Style
46
+
47
+ This gem utilizes the [upennlib-rubocop](https://gitlab.library.upenn.edu/dld/upennlib-rubocop)
48
+ gem to enforce a consistent style.
49
+
50
+ To run rubocop with the configuration:
51
+
52
+ ```bash
53
+ rubocop
54
+ ```
55
+
56
+ ### Testing
57
+
58
+ Testing is done with `rspec`. Test coverage should approach 100% given the relative simplicity of this gem.
59
+
60
+ To run the test suite:
61
+
62
+ ```bash
63
+ rspec
64
+ ```
65
+
66
+ ## QA
67
+
68
+ ### Checking output of an arbitrary MARC XML file
69
+
70
+ TODO
71
+
72
+ ```bash
73
+ MARC_FILE=path/to/marc.xml bundle exec rake pennmarc:parse
74
+ ```
75
+
76
+ ## TODO
77
+ - rake task or some similar command to return a full set of values extracted from a specified marcxml file
78
+ - hosting of yard output files?
79
+ - mappings (locations, call number, languages)
80
+ - Pipeline to run tests and publish to Rubygems
81
+ - rubocop check
82
+ - rdoc/yard coverage checks?