pennmarc 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +2 -0
- data/.ruby-version +1 -0
- data/Gemfile +23 -0
- data/Gemfile.lock +119 -0
- data/README.md +82 -0
- data/legacy/indexer.rb +568 -0
- data/legacy/marc.rb +2964 -0
- data/legacy/test_file_output.json +49 -0
- data/lib/pennmarc/encoding_level.rb +43 -0
- data/lib/pennmarc/enriched_marc.rb +36 -0
- data/lib/pennmarc/heading_control.rb +11 -0
- data/lib/pennmarc/helpers/citation.rb +31 -0
- data/lib/pennmarc/helpers/creator.rb +237 -0
- data/lib/pennmarc/helpers/database.rb +89 -0
- data/lib/pennmarc/helpers/date.rb +85 -0
- data/lib/pennmarc/helpers/edition.rb +90 -0
- data/lib/pennmarc/helpers/format.rb +312 -0
- data/lib/pennmarc/helpers/genre.rb +71 -0
- data/lib/pennmarc/helpers/helper.rb +11 -0
- data/lib/pennmarc/helpers/identifier.rb +134 -0
- data/lib/pennmarc/helpers/language.rb +37 -0
- data/lib/pennmarc/helpers/link.rb +12 -0
- data/lib/pennmarc/helpers/location.rb +97 -0
- data/lib/pennmarc/helpers/note.rb +132 -0
- data/lib/pennmarc/helpers/production.rb +131 -0
- data/lib/pennmarc/helpers/relation.rb +135 -0
- data/lib/pennmarc/helpers/series.rb +118 -0
- data/lib/pennmarc/helpers/subject.rb +304 -0
- data/lib/pennmarc/helpers/title.rb +197 -0
- data/lib/pennmarc/mappings/language.yml +516 -0
- data/lib/pennmarc/mappings/locations.yml +1801 -0
- data/lib/pennmarc/mappings/relator.yml +263 -0
- data/lib/pennmarc/parser.rb +177 -0
- data/lib/pennmarc/util.rb +240 -0
- data/lib/pennmarc.rb +6 -0
- data/pennmarc.gemspec +22 -0
- data/spec/fixtures/marcxml/test.xml +167 -0
- data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
- data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
- data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
- data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
- data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
- data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
- data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
- data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
- data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
- data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
- data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
- data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
- data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
- data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
- data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
- data/spec/lib/pennmarc/parser_spec.rb +13 -0
- data/spec/spec_helper.rb +104 -0
- data/spec/support/marc_spec_helpers.rb +84 -0
- metadata +171 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5480cac8bd9e0c8ae5fe096945f7dccb9c56730ee33f9fa35cee0aaee202fa6b
|
4
|
+
data.tar.gz: 38c4aa42f396b061cc7813320cc714081b85563fc9bf2f689588887018610863
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: e577b914ce87e01bdc5ddd95680b7e547398d40c12bda4f52ebf258abd80882b87bd53948dd306a3fa82935694eb57f0196c86819f602f5049d8932a5a8529e3
|
7
|
+
data.tar.gz: ede363f6fffbd352cd8cdb6ad97290f97a1bd14f7665788a82303c75625244d78d2ef301497d3892ff2c886e6c04e0fa8fd1685d6bfdf2bd78486155c79da117
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
3.2.2
|
data/Gemfile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source 'https://rubygems.org'
|
4
|
+
|
5
|
+
gem 'activesupport', '~> 7'
|
6
|
+
gem 'library_stdnums', '~> 1.6'
|
7
|
+
gem 'marc', '~> 1.2'
|
8
|
+
gem 'nokogiri', '~> 1.15'
|
9
|
+
gem 'rake', '~> 13.0'
|
10
|
+
|
11
|
+
group :test, :development do
|
12
|
+
gem 'rspec', '~> 3.12'
|
13
|
+
end
|
14
|
+
|
15
|
+
group :test do
|
16
|
+
gem 'simplecov', '~> 0.22'
|
17
|
+
end
|
18
|
+
|
19
|
+
group :development do
|
20
|
+
gem 'upennlib-rubocop', require: false
|
21
|
+
gem 'webrick', '~> 1.8'
|
22
|
+
gem 'yard', '~> 0.9'
|
23
|
+
end
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,119 @@
|
|
1
|
+
GEM
|
2
|
+
remote: https://rubygems.org/
|
3
|
+
specs:
|
4
|
+
activesupport (7.0.5)
|
5
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
6
|
+
i18n (>= 1.6, < 2)
|
7
|
+
minitest (>= 5.1)
|
8
|
+
tzinfo (~> 2.0)
|
9
|
+
ast (2.4.2)
|
10
|
+
concurrent-ruby (1.2.2)
|
11
|
+
diff-lcs (1.5.0)
|
12
|
+
docile (1.4.0)
|
13
|
+
i18n (1.13.0)
|
14
|
+
concurrent-ruby (~> 1.0)
|
15
|
+
json (2.6.3)
|
16
|
+
library_stdnums (1.6.0)
|
17
|
+
marc (1.2.0)
|
18
|
+
rexml
|
19
|
+
scrub_rb (>= 1.0.1, < 2)
|
20
|
+
unf
|
21
|
+
minitest (5.18.0)
|
22
|
+
nokogiri (1.15.2-arm64-darwin)
|
23
|
+
racc (~> 1.4)
|
24
|
+
nokogiri (1.15.2-x86_64-linux)
|
25
|
+
racc (~> 1.4)
|
26
|
+
parallel (1.23.0)
|
27
|
+
parser (3.2.2.1)
|
28
|
+
ast (~> 2.4.1)
|
29
|
+
racc (1.6.2)
|
30
|
+
rack (3.0.7)
|
31
|
+
rainbow (3.1.1)
|
32
|
+
rake (13.0.6)
|
33
|
+
regexp_parser (2.8.0)
|
34
|
+
rexml (3.2.5)
|
35
|
+
rspec (3.12.0)
|
36
|
+
rspec-core (~> 3.12.0)
|
37
|
+
rspec-expectations (~> 3.12.0)
|
38
|
+
rspec-mocks (~> 3.12.0)
|
39
|
+
rspec-core (3.12.2)
|
40
|
+
rspec-support (~> 3.12.0)
|
41
|
+
rspec-expectations (3.12.3)
|
42
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
43
|
+
rspec-support (~> 3.12.0)
|
44
|
+
rspec-mocks (3.12.5)
|
45
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
46
|
+
rspec-support (~> 3.12.0)
|
47
|
+
rspec-support (3.12.0)
|
48
|
+
rubocop (1.51.0)
|
49
|
+
json (~> 2.3)
|
50
|
+
parallel (~> 1.10)
|
51
|
+
parser (>= 3.2.0.0)
|
52
|
+
rainbow (>= 2.2.2, < 4.0)
|
53
|
+
regexp_parser (>= 1.8, < 3.0)
|
54
|
+
rexml (>= 3.2.5, < 4.0)
|
55
|
+
rubocop-ast (>= 1.28.0, < 2.0)
|
56
|
+
ruby-progressbar (~> 1.7)
|
57
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
58
|
+
rubocop-ast (1.28.1)
|
59
|
+
parser (>= 3.2.1.0)
|
60
|
+
rubocop-capybara (2.18.0)
|
61
|
+
rubocop (~> 1.41)
|
62
|
+
rubocop-factory_bot (2.23.1)
|
63
|
+
rubocop (~> 1.33)
|
64
|
+
rubocop-performance (1.18.0)
|
65
|
+
rubocop (>= 1.7.0, < 2.0)
|
66
|
+
rubocop-ast (>= 0.4.0)
|
67
|
+
rubocop-rails (2.19.1)
|
68
|
+
activesupport (>= 4.2.0)
|
69
|
+
rack (>= 1.1)
|
70
|
+
rubocop (>= 1.33.0, < 2.0)
|
71
|
+
rubocop-rake (0.6.0)
|
72
|
+
rubocop (~> 1.0)
|
73
|
+
rubocop-rspec (2.22.0)
|
74
|
+
rubocop (~> 1.33)
|
75
|
+
rubocop-capybara (~> 2.17)
|
76
|
+
rubocop-factory_bot (~> 2.22)
|
77
|
+
ruby-progressbar (1.13.0)
|
78
|
+
scrub_rb (1.0.1)
|
79
|
+
simplecov (0.22.0)
|
80
|
+
docile (~> 1.1)
|
81
|
+
simplecov-html (~> 0.11)
|
82
|
+
simplecov_json_formatter (~> 0.1)
|
83
|
+
simplecov-html (0.12.3)
|
84
|
+
simplecov_json_formatter (0.1.4)
|
85
|
+
tzinfo (2.0.6)
|
86
|
+
concurrent-ruby (~> 1.0)
|
87
|
+
unf (0.1.4)
|
88
|
+
unf_ext
|
89
|
+
unf_ext (0.0.8.2)
|
90
|
+
unicode-display_width (2.4.2)
|
91
|
+
upennlib-rubocop (1.1.0)
|
92
|
+
rubocop (~> 1.24)
|
93
|
+
rubocop-capybara
|
94
|
+
rubocop-performance
|
95
|
+
rubocop-rails
|
96
|
+
rubocop-rake
|
97
|
+
rubocop-rspec
|
98
|
+
webrick (1.8.1)
|
99
|
+
yard (0.9.34)
|
100
|
+
|
101
|
+
PLATFORMS
|
102
|
+
arm64-darwin-21
|
103
|
+
arm64-darwin-22
|
104
|
+
x86_64-linux
|
105
|
+
|
106
|
+
DEPENDENCIES
|
107
|
+
activesupport (~> 7)
|
108
|
+
library_stdnums (~> 1.6)
|
109
|
+
marc (~> 1.2)
|
110
|
+
nokogiri (~> 1.15)
|
111
|
+
rake (~> 13.0)
|
112
|
+
rspec (~> 3.12)
|
113
|
+
simplecov (~> 0.22)
|
114
|
+
upennlib-rubocop
|
115
|
+
webrick (~> 1.8)
|
116
|
+
yard (~> 0.9)
|
117
|
+
|
118
|
+
BUNDLED WITH
|
119
|
+
2.4.10
|
data/README.md
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
# Penn Libraries MARC Parser
|
2
|
+
|
3
|
+
This gem embodies the received and newfound wisdom of Penn Libraries MARC parsing practice. The values returned by this
|
4
|
+
parser should be agnostic about the particular discovery system in which it is included. Most of this was extracted from
|
5
|
+
the "Nouveau Franklin" project aka [discovery_app](https://gitlab.library.upenn.edu/franklin/discovery-app).
|
6
|
+
|
7
|
+
When included in a project, it should be utilized like this:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
parser = PennMARC::Parser.new # eventually we will pass in some mappings...
|
11
|
+
puts parser.title_show(marc_record) # Title intended for display
|
12
|
+
```
|
13
|
+
|
14
|
+
All methods will require a `MARC::Record` object. For more about these, see the
|
15
|
+
[ruby-marc](https://github.com/ruby-marc/ruby-marc) gem documentation
|
16
|
+
|
17
|
+
## Development
|
18
|
+
|
19
|
+
### Requirements
|
20
|
+
- ruby 3.2.2, other versions will probably work
|
21
|
+
|
22
|
+
### Setup
|
23
|
+
|
24
|
+
After cloning the repository and setting up Ruby for the project, run `bundle install` to install the gems.
|
25
|
+
|
26
|
+
### Organization
|
27
|
+
|
28
|
+
Classes in the `helpers` directory bring together common fields that may share logic. `PennMARC::Util` holds methods
|
29
|
+
used for common tasks such as joining subfields.
|
30
|
+
|
31
|
+
### Documentation
|
32
|
+
|
33
|
+
Highly descriptive and accurate documentation of MARC parsing practices will improve developer happiness, as well as
|
34
|
+
that of library collaborators. To this end, developers should utilize
|
35
|
+
[YARD documentation syntax](https://rubydoc.info/gems/yard/file/docs/GettingStarted.md) as appropriate.
|
36
|
+
|
37
|
+
A YARD documentation server can be run during development and will reload with updated docs as you work:
|
38
|
+
|
39
|
+
```bash
|
40
|
+
yard server --reload
|
41
|
+
```
|
42
|
+
|
43
|
+
When successful, the documentation pages will be available at [http://localhost:8808](http://localhost:8808).
|
44
|
+
|
45
|
+
### Style
|
46
|
+
|
47
|
+
This gem utilizes the [upennlib-rubocop](https://gitlab.library.upenn.edu/dld/upennlib-rubocop)
|
48
|
+
gem to enforce a consistent style.
|
49
|
+
|
50
|
+
To run rubocop with the configuration:
|
51
|
+
|
52
|
+
```bash
|
53
|
+
rubocop
|
54
|
+
```
|
55
|
+
|
56
|
+
### Testing
|
57
|
+
|
58
|
+
Testing is done with `rspec`. Test coverage should approach 100% given the relative simplicity of this gem.
|
59
|
+
|
60
|
+
To run the test suite:
|
61
|
+
|
62
|
+
```bash
|
63
|
+
rspec
|
64
|
+
```
|
65
|
+
|
66
|
+
## QA
|
67
|
+
|
68
|
+
### Checking output of an arbitrary MARC XML file
|
69
|
+
|
70
|
+
TODO
|
71
|
+
|
72
|
+
```bash
|
73
|
+
MARC_FILE=path/to/marc.xml bundle exec rake pennmarc:parse
|
74
|
+
```
|
75
|
+
|
76
|
+
## TODO
|
77
|
+
- rake task or some similar command to return a full set of values extracted from a specified marcxml file
|
78
|
+
- hosting of yard output files?
|
79
|
+
- mappings (locations, call number, languages)
|
80
|
+
- Pipeline to run tests and publish to Rubygems
|
81
|
+
- rubocop check
|
82
|
+
- rdoc/yard coverage checks?
|