pennmarc 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5480cac8bd9e0c8ae5fe096945f7dccb9c56730ee33f9fa35cee0aaee202fa6b
4
+ data.tar.gz: 38c4aa42f396b061cc7813320cc714081b85563fc9bf2f689588887018610863
5
+ SHA512:
6
+ metadata.gz: e577b914ce87e01bdc5ddd95680b7e547398d40c12bda4f52ebf258abd80882b87bd53948dd306a3fa82935694eb57f0196c86819f602f5049d8932a5a8529e3
7
+ data.tar.gz: ede363f6fffbd352cd8cdb6ad97290f97a1bd14f7665788a82303c75625244d78d2ef301497d3892ff2c886e6c04e0fa8fd1685d6bfdf2bd78486155c79da117
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+
2
+ # Ignore YARD products
3
+ .yardoc
4
+ doc/
5
+
6
+ *.gem
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 3.2.2
data/Gemfile ADDED
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ gem 'activesupport', '~> 7'
6
+ gem 'library_stdnums', '~> 1.6'
7
+ gem 'marc', '~> 1.2'
8
+ gem 'nokogiri', '~> 1.15'
9
+ gem 'rake', '~> 13.0'
10
+
11
+ group :test, :development do
12
+ gem 'rspec', '~> 3.12'
13
+ end
14
+
15
+ group :test do
16
+ gem 'simplecov', '~> 0.22'
17
+ end
18
+
19
+ group :development do
20
+ gem 'upennlib-rubocop', require: false
21
+ gem 'webrick', '~> 1.8'
22
+ gem 'yard', '~> 0.9'
23
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,119 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ activesupport (7.0.5)
5
+ concurrent-ruby (~> 1.0, >= 1.0.2)
6
+ i18n (>= 1.6, < 2)
7
+ minitest (>= 5.1)
8
+ tzinfo (~> 2.0)
9
+ ast (2.4.2)
10
+ concurrent-ruby (1.2.2)
11
+ diff-lcs (1.5.0)
12
+ docile (1.4.0)
13
+ i18n (1.13.0)
14
+ concurrent-ruby (~> 1.0)
15
+ json (2.6.3)
16
+ library_stdnums (1.6.0)
17
+ marc (1.2.0)
18
+ rexml
19
+ scrub_rb (>= 1.0.1, < 2)
20
+ unf
21
+ minitest (5.18.0)
22
+ nokogiri (1.15.2-arm64-darwin)
23
+ racc (~> 1.4)
24
+ nokogiri (1.15.2-x86_64-linux)
25
+ racc (~> 1.4)
26
+ parallel (1.23.0)
27
+ parser (3.2.2.1)
28
+ ast (~> 2.4.1)
29
+ racc (1.6.2)
30
+ rack (3.0.7)
31
+ rainbow (3.1.1)
32
+ rake (13.0.6)
33
+ regexp_parser (2.8.0)
34
+ rexml (3.2.5)
35
+ rspec (3.12.0)
36
+ rspec-core (~> 3.12.0)
37
+ rspec-expectations (~> 3.12.0)
38
+ rspec-mocks (~> 3.12.0)
39
+ rspec-core (3.12.2)
40
+ rspec-support (~> 3.12.0)
41
+ rspec-expectations (3.12.3)
42
+ diff-lcs (>= 1.2.0, < 2.0)
43
+ rspec-support (~> 3.12.0)
44
+ rspec-mocks (3.12.5)
45
+ diff-lcs (>= 1.2.0, < 2.0)
46
+ rspec-support (~> 3.12.0)
47
+ rspec-support (3.12.0)
48
+ rubocop (1.51.0)
49
+ json (~> 2.3)
50
+ parallel (~> 1.10)
51
+ parser (>= 3.2.0.0)
52
+ rainbow (>= 2.2.2, < 4.0)
53
+ regexp_parser (>= 1.8, < 3.0)
54
+ rexml (>= 3.2.5, < 4.0)
55
+ rubocop-ast (>= 1.28.0, < 2.0)
56
+ ruby-progressbar (~> 1.7)
57
+ unicode-display_width (>= 2.4.0, < 3.0)
58
+ rubocop-ast (1.28.1)
59
+ parser (>= 3.2.1.0)
60
+ rubocop-capybara (2.18.0)
61
+ rubocop (~> 1.41)
62
+ rubocop-factory_bot (2.23.1)
63
+ rubocop (~> 1.33)
64
+ rubocop-performance (1.18.0)
65
+ rubocop (>= 1.7.0, < 2.0)
66
+ rubocop-ast (>= 0.4.0)
67
+ rubocop-rails (2.19.1)
68
+ activesupport (>= 4.2.0)
69
+ rack (>= 1.1)
70
+ rubocop (>= 1.33.0, < 2.0)
71
+ rubocop-rake (0.6.0)
72
+ rubocop (~> 1.0)
73
+ rubocop-rspec (2.22.0)
74
+ rubocop (~> 1.33)
75
+ rubocop-capybara (~> 2.17)
76
+ rubocop-factory_bot (~> 2.22)
77
+ ruby-progressbar (1.13.0)
78
+ scrub_rb (1.0.1)
79
+ simplecov (0.22.0)
80
+ docile (~> 1.1)
81
+ simplecov-html (~> 0.11)
82
+ simplecov_json_formatter (~> 0.1)
83
+ simplecov-html (0.12.3)
84
+ simplecov_json_formatter (0.1.4)
85
+ tzinfo (2.0.6)
86
+ concurrent-ruby (~> 1.0)
87
+ unf (0.1.4)
88
+ unf_ext
89
+ unf_ext (0.0.8.2)
90
+ unicode-display_width (2.4.2)
91
+ upennlib-rubocop (1.1.0)
92
+ rubocop (~> 1.24)
93
+ rubocop-capybara
94
+ rubocop-performance
95
+ rubocop-rails
96
+ rubocop-rake
97
+ rubocop-rspec
98
+ webrick (1.8.1)
99
+ yard (0.9.34)
100
+
101
+ PLATFORMS
102
+ arm64-darwin-21
103
+ arm64-darwin-22
104
+ x86_64-linux
105
+
106
+ DEPENDENCIES
107
+ activesupport (~> 7)
108
+ library_stdnums (~> 1.6)
109
+ marc (~> 1.2)
110
+ nokogiri (~> 1.15)
111
+ rake (~> 13.0)
112
+ rspec (~> 3.12)
113
+ simplecov (~> 0.22)
114
+ upennlib-rubocop
115
+ webrick (~> 1.8)
116
+ yard (~> 0.9)
117
+
118
+ BUNDLED WITH
119
+ 2.4.10
data/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # Penn Libraries MARC Parser
2
+
3
+ This gem embodies the received and newfound wisdom of Penn Libraries MARC parsing practice. The values returned by this
4
+ parser should be agnostic about the particular discovery system in which it is included. Most of this was extracted from
5
+ the "Nouveau Franklin" project aka [discovery_app](https://gitlab.library.upenn.edu/franklin/discovery-app).
6
+
7
+ When included in a project, it should be utilized like this:
8
+
9
+ ```ruby
10
+ parser = PennMARC::Parser.new # eventually we will pass in some mappings...
11
+ puts parser.title_show(marc_record) # Title intended for display
12
+ ```
13
+
14
+ All methods will require a `MARC::Record` object. For more about these, see the
15
+ [ruby-marc](https://github.com/ruby-marc/ruby-marc) gem documentation
16
+
17
+ ## Development
18
+
19
+ ### Requirements
20
+ - ruby 3.2.2, other versions will probably work
21
+
22
+ ### Setup
23
+
24
+ After cloning the repository and setting up Ruby for the project, run `bundle install` to install the gems.
25
+
26
+ ### Organization
27
+
28
+ Classes in the `helpers` directory bring together common fields that may share logic. `PennMARC::Util` holds methods
29
+ used for common tasks such as joining subfields.
30
+
31
+ ### Documentation
32
+
33
+ Highly descriptive and accurate documentation of MARC parsing practices will improve developer happiness, as well as
34
+ that of library collaborators. To this end, developers should utilize
35
+ [YARD documentation syntax](https://rubydoc.info/gems/yard/file/docs/GettingStarted.md) as appropriate.
36
+
37
+ A YARD documentation server can be run during development and will reload with updated docs as you work:
38
+
39
+ ```bash
40
+ yard server --reload
41
+ ```
42
+
43
+ When successful, the documentation pages will be available at [http://localhost:8808](http://localhost:8808).
44
+
45
+ ### Style
46
+
47
+ This gem utilizes the [upennlib-rubocop](https://gitlab.library.upenn.edu/dld/upennlib-rubocop)
48
+ gem to enforce a consistent style.
49
+
50
+ To run rubocop with the configuration:
51
+
52
+ ```bash
53
+ rubocop
54
+ ```
55
+
56
+ ### Testing
57
+
58
+ Testing is done with `rspec`. Test coverage should approach 100% given the relative simplicity of this gem.
59
+
60
+ To run the test suite:
61
+
62
+ ```bash
63
+ rspec
64
+ ```
65
+
66
+ ## QA
67
+
68
+ ### Checking output of an arbitrary MARC XML file
69
+
70
+ TODO
71
+
72
+ ```bash
73
+ MARC_FILE=path/to/marc.xml bundle exec rake pennmarc:parse
74
+ ```
75
+
76
+ ## TODO
77
+ - rake task or some similar command to return a full set of values extracted from a specified marcxml file
78
+ - hosting of yard output files?
79
+ - mappings (locations, call number, languages)
80
+ - Pipeline to run tests and publish to Rubygems
81
+ - rubocop check
82
+ - rdoc/yard coverage checks?