mets_converter 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +2 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +4 -0
- data/LICENSE +15 -0
- data/README.md +49 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/mets_to_yaml +84 -0
- data/bin/setup +8 -0
- data/lib/indent.rb +5 -0
- data/lib/mets_converter/logging.rb +26 -0
- data/lib/mets_converter/mets_parser.rb +37 -0
- data/lib/mets_converter/version.rb +3 -0
- data/lib/mets_converter/yaml_builder.rb +147 -0
- data/lib/mets_converter.rb +13 -0
- data/mets_converter.gemspec +27 -0
- data/spec/fixtures/35556002332765.mets.xml +2510 -0
- data/spec/fixtures/test.yml +257 -0
- data/spec/fixtures/test_with_options.yml +256 -0
- data/spec/mets_converter/mets_parser_spec.rb +33 -0
- data/spec/mets_converter/yaml_builder_spec.rb +28 -0
- data/spec/mets_converter_spec.rb +7 -0
- data/spec/spec_helper.rb +11 -0
- data/xsd/mets.xsd +1768 -0
- metadata +146 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: c5dc746835db25047ca43467efca3ea3be74ba5ab281bd0d2bb89aef305a29da
|
4
|
+
data.tar.gz: 9fe15a4c78a4ac5b503b8e6f9879ec3860fbace9b67ac2a67006107a41a689e7
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 7d99159d56af0265d969230fb63f99a17abce666396f6299e60d5afb602ab8cfae7f5514242dffd14d968cb385f24762f953b5586641b7481f32060469eb3888
|
7
|
+
data.tar.gz: e571d6d8e43c96d00be1b1d5f4f1ba8427b21ceb4a4636df087797f5f6966664cfd1c88a08ead5fd0bff9457b43004fb231a19e6e977f6b95d336637b5c4abf0
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
In the interest of fostering an open and welcoming environment, we as
|
6
|
+
contributors and maintainers pledge to making participation in our project and
|
7
|
+
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
+
orientation.
|
11
|
+
|
12
|
+
## Our Standards
|
13
|
+
|
14
|
+
Examples of behavior that contributes to creating a positive environment
|
15
|
+
include:
|
16
|
+
|
17
|
+
* Using welcoming and inclusive language
|
18
|
+
* Being respectful of differing viewpoints and experiences
|
19
|
+
* Gracefully accepting constructive criticism
|
20
|
+
* Focusing on what is best for the community
|
21
|
+
* Showing empathy towards other community members
|
22
|
+
|
23
|
+
Examples of unacceptable behavior by participants include:
|
24
|
+
|
25
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
+
advances
|
27
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
+
* Public or private harassment
|
29
|
+
* Publishing others' private information, such as a physical or electronic
|
30
|
+
address, without explicit permission
|
31
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
+
professional setting
|
33
|
+
|
34
|
+
## Our Responsibilities
|
35
|
+
|
36
|
+
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
+
behavior and are expected to take appropriate and fair corrective action in
|
38
|
+
response to any instances of unacceptable behavior.
|
39
|
+
|
40
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
+
threatening, offensive, or harmful.
|
45
|
+
|
46
|
+
## Scope
|
47
|
+
|
48
|
+
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
+
when an individual is representing the project or its community. Examples of
|
50
|
+
representing a project or community include using an official project e-mail
|
51
|
+
address, posting via an official social media account, or acting as an appointed
|
52
|
+
representative at an online or offline event. Representation of a project may be
|
53
|
+
further defined and clarified by project maintainers.
|
54
|
+
|
55
|
+
## Enforcement
|
56
|
+
|
57
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
+
reported by contacting the project team at brendan-quinn@northwestern.edu. All
|
59
|
+
complaints will be reviewed and investigated and will result in a response that
|
60
|
+
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
+
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
+
Further details of specific enforcement policies may be posted separately.
|
63
|
+
|
64
|
+
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
+
faith may face temporary or permanent repercussions as determined by other
|
66
|
+
members of the project's leadership.
|
67
|
+
|
68
|
+
## Attribution
|
69
|
+
|
70
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
+
available at [http://contributor-covenant.org/version/1/4][version]
|
72
|
+
|
73
|
+
[homepage]: http://contributor-covenant.org
|
74
|
+
[version]: http://contributor-covenant.org/version/1/4/
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
##########################################################################
|
2
|
+
# Copyright 2019 Northwestern University Library
|
3
|
+
# Additional copyright may be held by others, as reflected in the commit log
|
4
|
+
#
|
5
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
# you may not use this file except in compliance with the License.
|
7
|
+
# You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing, software
|
12
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
# See the License for the specific language governing permissions and
|
15
|
+
# limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
# MetsConverter
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.com/nulib/mets_converter.svg)](https://travis-ci.com/nulib/mets_converter)
|
4
|
+
|
5
|
+
Convert a mets xml file into a yml file according to the Hathi Trust specifications for ingest. Note: this project was developed to satisfy Northwestern Libraries' specific use case, so many values are hard-coded in `MetsConverter::YamlBuilder#build`.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'mets_converter'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install mets_converter
|
22
|
+
|
23
|
+
Once installed, the `mets_to_yaml` command line utility should be available in your terminal.
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
Run `mets_to_yaml` without any arguments to see help text.
|
28
|
+
|
29
|
+
```sh
|
30
|
+
# Running the mets_to_yaml command
|
31
|
+
mets_to_yaml [options] input_file output_file_basename
|
32
|
+
```
|
33
|
+
|
34
|
+
```sh
|
35
|
+
# Example with optional arguments
|
36
|
+
mets_to_yaml --force --resolution="300" /cygdrive/e/books/limb_output/35556004429411/35556004429411.mets.xml meta
|
37
|
+
|
38
|
+
# meta.yml will be created in the same directory as the input file
|
39
|
+
```
|
40
|
+
|
41
|
+
## Development
|
42
|
+
|
43
|
+
After checking out the repo, run `bundle install` to install dependencies. Then, run `bundle exec rspec` to run the tests.
|
44
|
+
|
45
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
46
|
+
|
47
|
+
## Contributing
|
48
|
+
|
49
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/nulib/mets_converter. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'mets_converter'
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require 'irb'
|
14
|
+
IRB.start(__FILE__)
|
data/bin/mets_to_yaml
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mets_converter'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
options = {}
|
7
|
+
|
8
|
+
option_parser = OptionParser.new do |opts| # rubocop:disable Metrics/BlockLength
|
9
|
+
executable_name = File.basename($PROGRAM_NAME)
|
10
|
+
opts.banner = "Usage: #{executable_name} [options] input_file output_file_name"
|
11
|
+
|
12
|
+
opts.banner = <<-EOS
|
13
|
+
Convert a mets xml file into a yml file according to the Hathi Trust specifications for ingest
|
14
|
+
Usage: #{executable_name} [options] input_file output_file_basename
|
15
|
+
Example usage: #{executable_name} --force --resolution="300" /cygdrive/e/books/limb_output/35556004429411/35556004429411.mets.xml meta
|
16
|
+
Example output: meta.yml
|
17
|
+
Options:
|
18
|
+
EOS
|
19
|
+
|
20
|
+
opts.on('--[no-]force',
|
21
|
+
'Overwrite existing files') do |force| # (1)
|
22
|
+
options[:force] = force
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on('--suprascan',
|
26
|
+
'"SupraScan Quartz A1" will override default "Kirtas APT 1200"') do
|
27
|
+
options[:suprascan] = true
|
28
|
+
end
|
29
|
+
|
30
|
+
opts.on('--resolution=resolution_integer',
|
31
|
+
'Enter numeric value of scanning resolution to override default 300') do |resolution|
|
32
|
+
options[:resolution] = resolution
|
33
|
+
end
|
34
|
+
|
35
|
+
opts.on('--scanning_order_rtl',
|
36
|
+
'Use if the scanning order is right-to-left to override default left-to-right') do
|
37
|
+
options[:scanning_order_rtl] = true
|
38
|
+
end
|
39
|
+
|
40
|
+
opts.on('--reading_order_rtl',
|
41
|
+
'Use if the reading order is right-to-left to override default left-to-right') do
|
42
|
+
options[:reading_order_rtl] = true
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
begin
|
47
|
+
$stdout.puts 'Parsing optional arguments...'
|
48
|
+
option_parser.parse!
|
49
|
+
if ARGV.length < 2
|
50
|
+
puts 'error: you must supply an input file and an output file name'
|
51
|
+
puts option_parser.help
|
52
|
+
exit 2
|
53
|
+
end
|
54
|
+
rescue OptionParser::InvalidArgument => ex
|
55
|
+
$stderr.puts ex.message
|
56
|
+
$stderr.puts option_parser
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
|
60
|
+
# set output file based on second command-line argument
|
61
|
+
output_file_basename = ARGV[1]
|
62
|
+
$stdout.puts "Building #{output_file_basename}.yml..."
|
63
|
+
|
64
|
+
# create mets object from first command-line argument
|
65
|
+
mets = MetsConverter::MetsParser.new(ARGV[0])
|
66
|
+
raise "The file #{ARGV[0]} does not contain valid XML" if mets.document.nil?
|
67
|
+
|
68
|
+
# build yaml combining mets data and command-line options
|
69
|
+
yaml = MetsConverter::YamlBuilder.new(mets, options).build
|
70
|
+
output_file = File.join(mets.input_file_location, "#{output_file_basename}.yml")
|
71
|
+
|
72
|
+
if File.exist? output_file
|
73
|
+
if options[:force]
|
74
|
+
$stdout.puts "Overwriting #{output_file}"
|
75
|
+
else
|
76
|
+
$stderr.puts "#{output_file} already exists, use --force to overwrite"
|
77
|
+
exit 1
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
unless ENV['NO_RUN']
|
82
|
+
File.write(output_file, yaml)
|
83
|
+
$stdout.puts "Created #{output_file} successfully"
|
84
|
+
end
|
data/bin/setup
ADDED
data/lib/indent.rb
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'logger'
|
2
|
+
|
3
|
+
module MetsConverter
|
4
|
+
module Logging
|
5
|
+
def self.initialize_logger(log_target = STDOUT)
|
6
|
+
oldlogger = defined?(@logger) ? @logger : nil
|
7
|
+
@logger = Logger.new(log_target)
|
8
|
+
@logger.level = Logger::INFO
|
9
|
+
@logger.datetime_format = '%Y-%m-%d %H:%M:%S '
|
10
|
+
oldlogger.close if oldlogger
|
11
|
+
@logger
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.logger
|
15
|
+
defined?(@logger) ? @logger : initialize_logger
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.logger=(log)
|
19
|
+
@logger = (log ? log : MetsConverter::Logging.logger.new(File::NULL))
|
20
|
+
end
|
21
|
+
|
22
|
+
def logger
|
23
|
+
MetsConverter::Logging.logger
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
module MetsConverter
|
4
|
+
class MetsParser
|
5
|
+
attr_reader :document
|
6
|
+
|
7
|
+
def initialize(file)
|
8
|
+
@file = file
|
9
|
+
@document = parse_xml
|
10
|
+
end
|
11
|
+
|
12
|
+
def input_file_location
|
13
|
+
File.dirname(@file)
|
14
|
+
end
|
15
|
+
|
16
|
+
def capture_date
|
17
|
+
document.xpath('//xmlns:metsHdr')[0].attr('CREATEDATE') + '-06:00'
|
18
|
+
end
|
19
|
+
|
20
|
+
def pages
|
21
|
+
document.search('structMap[@TYPE="logical"]//div[@TYPE="page"]')
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def parse_xml
|
27
|
+
input_file = File.open(@file)
|
28
|
+
begin
|
29
|
+
xsd = Nokogiri::XML::Schema(File.open('xsd/mets.xsd'))
|
30
|
+
doc = Nokogiri::XML(input_file, &:strict)
|
31
|
+
xsd.valid?(doc) ? doc : raise('Invalid METS file')
|
32
|
+
rescue Nokogiri::XML::SyntaxError => e
|
33
|
+
MetsConverter.logger.error "XML error: #{e}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
require_relative '../indent'
|
2
|
+
require 'pry'
|
3
|
+
|
4
|
+
module MetsConverter
|
5
|
+
class YamlBuilder
|
6
|
+
|
7
|
+
attr_reader :document, :pages, :options
|
8
|
+
|
9
|
+
def initialize(mets, **options)
|
10
|
+
@document = mets.document
|
11
|
+
@pages = mets.pages
|
12
|
+
@options = options
|
13
|
+
end
|
14
|
+
|
15
|
+
def build
|
16
|
+
# Descriptive and technical information
|
17
|
+
|
18
|
+
# Capture Date
|
19
|
+
# <metsHdr CREATEDATE="2015-07-01T15:26:39" RECORDSTATUS="Complete">
|
20
|
+
yaml = "capture_date: #{document.xpath("//xmlns:metsHdr")[0].attr("CREATEDATE")}-06:00\n"
|
21
|
+
# Scanner Make and Model
|
22
|
+
if options[:suprascan]
|
23
|
+
yaml += "scanner_make: SupraScan\n"
|
24
|
+
yaml += "scanner_model: Quartz A1\n"
|
25
|
+
else
|
26
|
+
yaml += "scanner_make: Kirtas\n"
|
27
|
+
yaml += "scanner_model: APT 1200\n"
|
28
|
+
end
|
29
|
+
# Scanner User
|
30
|
+
yaml += "scanner_user: \"Northwestern University Library: Repository & Digital Curation\"\n"
|
31
|
+
# Resolution
|
32
|
+
yaml += "contone_resolution_dpi: #{options[:resolution] || 300}\n"
|
33
|
+
# Image Compression Date
|
34
|
+
yaml += "image_compression_date: #{document.xpath("//xmlns:metsHdr")[0].attr("CREATEDATE")}-06:00\n"
|
35
|
+
# Image Compression Agent
|
36
|
+
yaml += "image_compression_agent: northwestern\n"
|
37
|
+
# Image Compression Tool
|
38
|
+
yaml += "image_compression_tool: [\"LIMB v3.1.0.0\"]\n"
|
39
|
+
# Scanning Order
|
40
|
+
if options[:scanning_order_rtl]
|
41
|
+
yaml += "scanning_order: right-to-left\n"
|
42
|
+
else
|
43
|
+
yaml += "scanning_order: left-to-right\n"
|
44
|
+
end
|
45
|
+
# Reading Order
|
46
|
+
if options[:reading_order_rtl]
|
47
|
+
yaml += "reading_order: right-to-left\n"
|
48
|
+
else
|
49
|
+
yaml += "reading_order: left-to-right\n"
|
50
|
+
yaml += "pagedata:\n"
|
51
|
+
end
|
52
|
+
|
53
|
+
# File List
|
54
|
+
|
55
|
+
# Loop through pages within logical structMap
|
56
|
+
pages.each do |element|
|
57
|
+
# Store the fileid for the jp2
|
58
|
+
file_id = element.xpath('./xmlns:fptr[starts-with(@FILEID, "JP2")]')[0]["FILEID"]
|
59
|
+
# Store the jp2 filename
|
60
|
+
filename = find_filename_by_file_id(file_id)
|
61
|
+
# Since the yaml flattens out the xml structure,
|
62
|
+
# the first child of each parent gets special treatment (of course)
|
63
|
+
# i.e. labels for covers, titles, chapters, etc.
|
64
|
+
if element == element.parent.first_element_child
|
65
|
+
case
|
66
|
+
when element.parent["LABEL"] == "Cover" && element.parent["TYPE"] == "cover" && element.parent == document.search('structMap[@TYPE="logical"]//div[@TYPE="cover"]').first
|
67
|
+
if element["ORDERLABEL"].empty?
|
68
|
+
line = filename + ": { label: \"FRONT_COVER\" }\n"
|
69
|
+
else
|
70
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"FRONT_COVER\" }\n"
|
71
|
+
end
|
72
|
+
when element.parent["LABEL"] == "Front Matter"
|
73
|
+
next if element["ORDERLABEL"].empty?
|
74
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n"
|
75
|
+
when element.parent["LABEL"] == "Cover" && element.parent["TYPE"] == "appendix"
|
76
|
+
next if element["ORDERLABEL"].empty?
|
77
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n"
|
78
|
+
when element.parent["LABEL"] == "Title"
|
79
|
+
if element["ORDERLABEL"].empty?
|
80
|
+
line = filename + ": { label: \"TITLE\" }\n"
|
81
|
+
else
|
82
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"TITLE\" }\n"
|
83
|
+
end
|
84
|
+
when element.parent["LABEL"] == "Contents"
|
85
|
+
if element["ORDERLABEL"].empty?
|
86
|
+
line = filename + ": { label: \"TABLE_OF_CONTENTS\" }\n"
|
87
|
+
else
|
88
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"TABLE_OF_CONTENTS\" }\n"
|
89
|
+
end
|
90
|
+
when element.parent["LABEL"] == "Preface"
|
91
|
+
if element["ORDERLABEL"].empty?
|
92
|
+
line = filename + ": { label: \"PREFACE\" }\n"
|
93
|
+
else
|
94
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"PREFACE\" }\n"
|
95
|
+
end
|
96
|
+
# First page within the body, can be within a div with label attribute "Introduction" or "Chapter"
|
97
|
+
when element == document.at('structMap[@TYPE="logical"]//div[@TYPE="body"]/div[1]/div[1]') && (element.parent["LABEL"] == "Introduction" || element.parent["LABEL"].start_with?("Chapter"))
|
98
|
+
if element["ORDERLABEL"].empty?
|
99
|
+
line = filename + ": { label: \"FIRST_CONTENT_CHAPTER_START\" }\n"
|
100
|
+
else
|
101
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"FIRST_CONTENT_CHAPTER_START\" }\n"
|
102
|
+
end
|
103
|
+
when element.parent["LABEL"] == "Back Matter"
|
104
|
+
next if element["ORDERLABEL"].empty?
|
105
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n"
|
106
|
+
when element.parent["LABEL"].start_with?("Chapter") || element.parent["LABEL"] == "Appendix"
|
107
|
+
if element["ORDERLABEL"].empty?
|
108
|
+
line = filename + ": { label: \"CHAPTER_START\" }\n"
|
109
|
+
else
|
110
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"CHAPTER_START\" }\n"
|
111
|
+
end
|
112
|
+
when element.parent["LABEL"] == "Notes" || element.parent["LABEL"] == "Bibliography"
|
113
|
+
if element["ORDERLABEL"].empty?
|
114
|
+
line = filename + ": { label: \"REFERENCES\" }\n"
|
115
|
+
else
|
116
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"REFERENCES\" }\n"
|
117
|
+
end
|
118
|
+
when element.parent["LABEL"] == "Index"
|
119
|
+
if element["ORDERLABEL"].empty?
|
120
|
+
line = filename + ": { label: \"INDEX\" }\n"
|
121
|
+
else
|
122
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"INDEX\" }\n"
|
123
|
+
end
|
124
|
+
when element.parent["LABEL"] == "Cover" && element.parent["TYPE"] == "cover" && element.parent == document.search('structMap[@TYPE="logical"]//div[@TYPE="cover"]').last
|
125
|
+
if element["ORDERLABEL"].empty?
|
126
|
+
line = filename + ": { label: \"BACK_COVER\" }\n"
|
127
|
+
else
|
128
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\", label: \"BACK_COVER\" }\n"
|
129
|
+
end
|
130
|
+
end
|
131
|
+
else
|
132
|
+
# remaining pages
|
133
|
+
# skip pages that don't have page numbers (stored in "ORDERLABEL" attribute)
|
134
|
+
next if element["ORDERLABEL"].empty?
|
135
|
+
line = filename + ": { orderlabel: \"#{element["ORDERLABEL"]}\" }\n"
|
136
|
+
end
|
137
|
+
yaml += line.indent(4) if line
|
138
|
+
end
|
139
|
+
|
140
|
+
yaml
|
141
|
+
end
|
142
|
+
|
143
|
+
def find_filename_by_file_id(id)
|
144
|
+
document.xpath("//xmlns:file[@ID=\"#{id}\"]/xmlns:FLocat")[0]['xlink:href'][7..-1]
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'mets_converter/mets_parser'
|
2
|
+
require 'mets_converter/version'
|
3
|
+
require 'mets_converter/yaml_builder'
|
4
|
+
|
5
|
+
module MetsConverter #:nodoc:
|
6
|
+
def self.logger
|
7
|
+
MetsConverter::Logging.logger
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.logger=(log)
|
11
|
+
MetsConverter::Logging.logger = log
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'mets_converter/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'mets_converter'
|
8
|
+
spec.version = MetsConverter::VERSION
|
9
|
+
spec.authors = ['Brendan Quinn']
|
10
|
+
spec.email = ['brendan-quinn@northwestern.edu']
|
11
|
+
|
12
|
+
spec.summary = 'Converts METS schema encoded XML to YML'
|
13
|
+
spec.description = 'Converts METS schema encoded XML to YML'
|
14
|
+
spec.homepage = 'https://github.com/nulib/mets_converter'
|
15
|
+
|
16
|
+
spec.files = `git ls-files`.split($/)
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.executables = ["mets_to_yaml"]
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_dependency 'nokogiri', '>= 1.10.4'
|
22
|
+
|
23
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
24
|
+
spec.add_development_dependency 'pry'
|
25
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
26
|
+
spec.add_development_dependency 'rspec', '~> 3.9'
|
27
|
+
end
|