libis-metadata 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +6 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/exe/metadata +5 -0
  12. data/lib/libis/metadata/cli/cli_downloader.rb +182 -0
  13. data/lib/libis/metadata/cli/cli_helper.rb +74 -0
  14. data/lib/libis/metadata/command_line.rb +25 -0
  15. data/lib/libis/metadata/downloader.rb +117 -0
  16. data/lib/libis/metadata/dublin_core_record.rb +115 -0
  17. data/lib/libis/metadata/field_format.rb +119 -0
  18. data/lib/libis/metadata/fix_field.rb +33 -0
  19. data/lib/libis/metadata/mapper.rb +80 -0
  20. data/lib/libis/metadata/mappers/flandrica.rb +76 -0
  21. data/lib/libis/metadata/mappers/kuleuven.rb +1929 -0
  22. data/lib/libis/metadata/mappers/scope.rb +46 -0
  23. data/lib/libis/metadata/marc21_record.rb +49 -0
  24. data/lib/libis/metadata/marc_record.rb +285 -0
  25. data/lib/libis/metadata/parser/basic_parser.rb +116 -0
  26. data/lib/libis/metadata/parser/dublin_core_parser.rb +35 -0
  27. data/lib/libis/metadata/parser/marc21_parser.rb +205 -0
  28. data/lib/libis/metadata/parser/marc_format_parser.rb +51 -0
  29. data/lib/libis/metadata/parser/marc_rules.rb +34 -0
  30. data/lib/libis/metadata/parser/marc_select_parser.rb +24 -0
  31. data/lib/libis/metadata/parser/patch.rb +22 -0
  32. data/lib/libis/metadata/parser/subfield_criteria_parser.rb +70 -0
  33. data/lib/libis/metadata/parsers.rb +12 -0
  34. data/lib/libis/metadata/sharepoint_mapping.rb +119 -0
  35. data/lib/libis/metadata/sharepoint_record.rb +262 -0
  36. data/lib/libis/metadata/var_field.rb +242 -0
  37. data/lib/libis/metadata/version.rb +5 -0
  38. data/lib/libis/metadata.rb +25 -0
  39. data/lib/libis-metadata.rb +1 -0
  40. data/metadata.gemspec +39 -0
  41. metadata +266 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 23ff1ec087acd8448cd8230000f6d5b3aa5f7acf
4
+ data.tar.gz: a908b2af8489af83de9284e529abdfd4e7b68f08
5
+ SHA512:
6
+ metadata.gz: aa677a22babb3b23d1066901db39d6b0d0a40832bdc6d8a28aa14cd0ff21e616ca4ad4c67adf990c7a5a4dcb00068b9c3345e072fce28e42eb7335db57ed8ace
7
+ data.tar.gz: 3223dace5cc94e68359694f2dfaf50640b55af25a156b083bc0ca793abb403285b5fd77df4eee8553d241207be624504a22562da42bcd71204567c153ee2b553
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.0
5
+ before_install: gem install bundler -v 1.16.1
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in metadata.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Kris Dekeyser
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # Metadata
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/metadata`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'libis-metadata'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install libis-metadata
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/metadata.
36
+
37
+ ## License
38
+
39
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "libis-metadata"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/exe/metadata ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'libis/metadata'
4
+
5
+ Libis::Metadata::CommandLine.start(ARGV)
@@ -0,0 +1,182 @@
1
+ require 'libis/tools/spreadsheet'
2
+ require 'awesome_print'
3
+
4
+ module Libis
5
+ module Metadata
6
+ module Cli
7
+ module Downloader
8
+
9
+ # noinspection RubyStringKeysInHashInspection
10
+ VALID_SOURCES = {
11
+ 'alma' => 'alma.mms_id',
12
+ 'scope' => 'ID'
13
+ }
14
+
15
+ REQ_HEADERS = {term: 'Term'}
16
+ OPT_HEADERS = {pid: 'Pid', filename: 'File'}
17
+
18
+ def self.included(klass)
19
+ klass.class_exec do
20
+ desc 'download [options] [TERM[:PID] ...]', 'Download metadata from Alma or Scope'
21
+ long_desc <<-DESC
22
+
23
+ 'download [TERM ...]' will download metadata from Alma or Scope and convert it to Dublin Core.
24
+
25
+ The output format can either be Dublin Core or a Rosetta MD Update file. If you supply a Rosetta IE PID the
26
+ tool will generate a Rosetta MD Update file, a Dublin Core XML file if you don't. Note that there is no
27
+ check if the IE PID is a valid one.
28
+
29
+ Any TERM argument that starts with a '@' will be interpreted as an input file name. The input file name can
30
+ be:
31
+
32
+ * a simple text file. File extension should be '.txt' and each non-empty line is interpreted as if it was
33
+ supplied as a TERM argument on the command line.
34
+
35
+ * a comma-delimited file (CSV). File extension should be '.csv'.
36
+
37
+ * a tab-deliimited file (TSV). File extension should be '.tsv'.
38
+
39
+ * a spreadsheet. Excel files (.xls or xlsx) and OpenOffice/LibreOffice Calc files (.ods) are supported.
40
+ '@<sheet_name>' must be appended to the file name to select the proper sheet tab.
41
+
42
+ For the CSV, TSV and spreadsheets: if there is no header row, the first column should contain the search
43
+ terms. If present, the second column should contain PID info and the third column FILE info. Other columns
44
+ are ignored. If there is a header row, it should contain at least a cell with the text 'Term'. That column
45
+ is expexted to contain the search terms. If a column header with the text 'Pid' is found, the column data
46
+ will be expected to contain pids for the IE's to modify. If a column header with the text 'File' is found,
47
+ the column data will be expected to contain file names to save to.
48
+
49
+ In any case, if the output file info is missing, the name defaults to the PID (if present) or the search
50
+ term. If the FILE info does not have a file extension, '.xml' will be added.
51
+
52
+ The list of TERM arguments will be processed automatically. If there are no terms supplied on the command
53
+ line, the program will ask for them until you supply an empty value. A TERM argument can contain PID and
54
+ FILE info separated by a ':' or whatever you supply for the separator option. TERM arguments supplied via
55
+ a simple text file are interpreted the same way.
56
+
57
+ Examples:
58
+ * abc => searches for 'abc' and save DC metadata in abc.xml
59
+ * abc:123 => searches for 'abc' and generates MD Update in 123.xml
60
+ * abc:123:xyz.data => searches for 'abc' and generates MD Update in xyz.data
61
+ * abc::xyz => searches for 'abc' and save DC metadata in xyz.xml
62
+
63
+ For any option that is not supplied on the command line and doesn't have a default value, the tool will
64
+ always ask you to supply a value, even if the '-q' option is given.
65
+
66
+ DESC
67
+
68
+ method_option :quiet, aliases: '-q', desc: 'Do not ask for options that have a default value',
69
+ type: :boolean, default: false
70
+ method_option :metadata, aliases: '-m', banner: 'source', desc: 'Metadata source system',
71
+ default: VALID_SOURCES.keys.first, enum: VALID_SOURCES.keys
72
+ method_option :field, aliases: '-f', banner: 'field_name', desc: 'Search field in the Metadata system;' +
73
+ " default value depends on selected metadata source system: #{VALID_SOURCES}"
74
+ method_option :library, aliases: '-l', banner: 'library_code', desc: 'Library code for Alma',
75
+ default: '32KUL_KUL'
76
+ method_option :database, aliases: '-d', desc: 'Scope database to connect to'
77
+ method_option :user, aliases: '-u', desc: 'Database user name'
78
+ method_option :password, aliases: '-p', desc: 'Database password'
79
+ method_option :target_dir, aliases: '-t', desc: 'Directory where files will be created', default: '.'
80
+ method_option :separator, aliases: '-s', desc: 'Separator for the TERM arguments', default: ':'
81
+
82
+ end
83
+ end
84
+
85
+ def download(*terms)
86
+
87
+ unless Dir.exist?(options.target_dir)
88
+ prompt.error "ERROR: target directory '#{options.target_dir}' does not exist"
89
+ exit -1
90
+ end
91
+
92
+ md = Libis::Metadata::Downloader.new
93
+
94
+ config = download_configure
95
+
96
+ md.configure config
97
+
98
+ if terms.empty?
99
+ while (term = prompt.ask "Search #{config[:metadata]} for #{config[:field]}:")
100
+ pid = prompt.ask 'IE PID to update:'
101
+ filename = prompt.ask('File name:', default: "#{pid || term}.xml")
102
+ download_one(service: md, term: term, pid: pid, filename: filename)
103
+ end
104
+ else
105
+ terms.each do |term|
106
+ if term =~ /^@((.+)@(.+)|(.+))$/
107
+ sheet = $3
108
+ file = $2 || $4
109
+ unless File.exist? file
110
+ prompt.warn "WARNING: File name '#{file}' not found."
111
+ next
112
+ end
113
+ if File.extname(file) == '.txt'
114
+ File.open(file, 'r').each_line do |line|
115
+ line.strip!
116
+ next if line.empty?
117
+ x = split_term(line)
118
+ download_one(service: md, term: x[:term], pid: x[:pid], filename: x[:filename])
119
+ end
120
+ else
121
+ opts = {required: REQ_HEADERS, optional: OPT_HEADERS, noheaders: REQ_HEADERS.merge(OPT_HEADERS)}
122
+ opts.merge!(col_sep: "\t", extension: :csv) if File.extname(file) == '.tsv'
123
+ Libis::Tools::Spreadsheet.foreach("#{file}#{sheet ? '|' + sheet : ''}", opts) do |row|
124
+ next if row[:term].nil? || row[:term] == 'Term'
125
+ ap row
126
+ download_one(service: md, term: row[:term], pid: row[:pid], filename: row[:filename])
127
+ end
128
+ end
129
+ next
130
+ end
131
+ x = split_term(term)
132
+ download_one(service: md, term: x[:term], pid: x[:pid], filename: x[:filename])
133
+ end
134
+ end
135
+ end
136
+
137
+ protected
138
+
139
+ def split_term(term)
140
+ result = {}
141
+ t, p, f = term.split(options.separator)
142
+ result[:term] = t
143
+ result[:pid] = p if p && !p.empty?
144
+ result[:filename] = f if f && !f.empty?
145
+ result
146
+ end
147
+
148
+ def download_one(service:, term:, pid: nil, filename: nil)
149
+ filename += '.xml' unless (filename.nil? || !File.extname(filename).empty?)
150
+ filename ||= "#{pid || term}.xml"
151
+ service.download(term, filename, pid)
152
+ prompt.ok "OK: #{filename}#{" [#{term}]"}"
153
+ rescue Exception => e
154
+ prompt.error e.message.strip
155
+ end
156
+
157
+ def download_configure
158
+ result = options.quiet? ? options.key_strings_to_symbols : {}
159
+
160
+ ask 'Metadata source:', result, :metadata, default: options.metadata, enum: VALID_SOURCES.keys
161
+ # Set appropriate default for field depending on source selection
162
+ result[:field] ||= VALID_SOURCES[result[:metadata]] if options.quiet?
163
+ ask 'Search field:', result, :field, default: options.field || VALID_SOURCES[result[:metadata]]
164
+ case result[:metadata]
165
+ when 'alma'
166
+ ask 'Library code:', result, :library, default: options.library
167
+ when 'scope'
168
+ ask 'Database name:', result, :database, default: options.database
169
+ ask 'User name:', result, :user, default: options.user
170
+ ask 'Password:', result, :password, default: options.password, mask: true
171
+ else
172
+ # Other source
173
+ end
174
+ result[:target_dir] = tree_select(options.target_dir, question: 'Target directory:') unless result[:target_dir]
175
+
176
+ result
177
+ end
178
+
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,74 @@
1
+ module Libis
2
+ module Metadata
3
+ module Cli
4
+ module Helper
5
+
6
+ module ClassMethods
7
+
8
+ def exit_on_failure?
9
+ true
10
+ end
11
+
12
+ end
13
+
14
+ def self.included(base)
15
+ base.extend(ClassMethods)
16
+ end
17
+
18
+ attr_reader :prompt
19
+
20
+ def initialize(*args)
21
+ @prompt = TTY::Prompt.new
22
+ super
23
+ end
24
+
25
+ private
26
+
27
+ def index_of(list, value)
28
+ i = list.index(value)
29
+ i += 1 if i
30
+ i
31
+ end
32
+
33
+ def ask(question, config, field, enum: nil, default: nil, mask: false)
34
+ cmd, args, opts = :ask, [question], {}
35
+ if enum
36
+ cmd = :select
37
+ args << enum
38
+ # Change default to its index in the enum
39
+ default = index_of(enum, default)
40
+ # Force the question if the supplied value is not valid
41
+ config[field] = nil unless enum.include? config[field]
42
+ end
43
+ cmd = :mask if mask
44
+ opts[:default] = default if default
45
+ config[field] = prompt.send cmd, *args, opts if config[field].nil?
46
+ end
47
+
48
+ def tree_select(path, question: nil, file: false, page_size: 22, filter: true, cycle: false)
49
+ path = Pathname.new(path) unless path.is_a? Pathname
50
+ path = path.realpath
51
+
52
+ dirs = path.children.select {|x| x.directory?}.sort
53
+ files = file ? path.children.select {|x| x.file?}.sort : []
54
+
55
+ choices = []
56
+ choices << {name: "#{path}", value: path, disabled: file ? '' : false}
57
+ choices << {name: '[..]', value: path.parent}
58
+
59
+ dirs.each {|d| choices << {name: "[#{d.basename}]", value: d}}
60
+ files.each {|f| choices << {name: f.basename.to_path, value: f}}
61
+
62
+ question ||= "Select #{'file or ' if files}directory"
63
+ selection = prompt.select question, choices,
64
+ per_page: page_size, filter: filter, cycle: cycle, default: file ? 2 : 1
65
+
66
+ return selection.to_path if selection == path || selection.file?
67
+
68
+ tree_select selection, file: file, page_size: page_size, filter: filter, cycle: cycle
69
+ end
70
+
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,25 @@
1
+ require 'thor'
2
+ require 'tty-prompt'
3
+
4
+ require 'libis/metadata/downloader'
5
+ require 'libis/tools/extend/hash'
6
+
7
+ require 'libis/metadata/cli/cli_helper'
8
+ require 'libis/metadata/cli/cli_downloader'
9
+
10
+ module Libis
11
+ module Metadata
12
+
13
+ class CommandLine < Thor
14
+
15
+ include Cli::Helper
16
+ include Cli::Downloader
17
+
18
+ def download(*terms)
19
+ super
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,117 @@
1
+ require 'uri'
2
+ require 'pathname'
3
+ require 'awesome_print'
4
+
5
+ require 'libis/services/alma/sru_service'
6
+ require 'libis/services/scope/search'
7
+
8
+ require 'libis/tools/metadata/marc21_record'
9
+ require 'libis/tools/metadata/dublin_core_record'
10
+ require 'libis/tools/xml_document'
11
+ require 'libis/tools/extend/string'
12
+
13
+ require 'libis/tools/metadata/mappers/kuleuven'
14
+ require 'libis/tools/metadata/mappers/scope'
15
+
16
+ module Libis
17
+ module Metadata
18
+
19
+ class Downloader
20
+ attr_reader :service, :mapper_class, :config
21
+
22
+ def initialize
23
+ @service = nil
24
+ @target_dir = nil
25
+ @config = nil
26
+ @mapper_class = nil
27
+ end
28
+
29
+ def configure(config)
30
+ metadata = config[:metadata]
31
+ case metadata
32
+ when 'alma'
33
+ @service ||= Libis::Services::Alma::SruService.new
34
+ @mapper_class = Libis::Tools::Metadata::Mappers::Kuleuven
35
+ when 'scope'
36
+ @service = ::Libis::Services::Scope::Search.new
37
+ @mapper_class = Libis::Tools::Metadata::Mappers::Scope
38
+ @service.connect(config[:password], config[:password], config[:database])
39
+ else
40
+ raise RuntimeError, "Service '#{service}' unknown"
41
+ end
42
+ @target_dir = config[:target_dir]
43
+ @config = config
44
+ rescue Exception => e
45
+ raise RuntimeError "failed to configure metadata service: #{e.message} @ #{e.backtrace.first}"
46
+ end
47
+
48
+ def download(term, filename, pid = nil)
49
+ record = search(term)
50
+ return nil unless record
51
+
52
+ record = md_update_xml(pid, record) if pid
53
+
54
+ record.save File.join(@target_dir, filename)
55
+
56
+ filename
57
+ end
58
+
59
+ # @return [Libis::Tools::Metadata::DublinCoreRecord]
60
+ def search(term)
61
+ record = case service
62
+ when ::Libis::Services::Alma::SruService
63
+ result = service.search(config[:field], URI::encode("\"#{term}\""), config[:library])
64
+ raise RuntimeError "Multiple records found for #{config[:field]}=#{term}" if result.size > 1
65
+ result.empty? ? nil : ::Libis::Tools::Metadata::Marc21Record.new(result.first.root)
66
+
67
+ when ::Libis::Services::Scope::Search
68
+ service.query(term, type: config[:field])
69
+ service.next_record do |doc|
70
+ ::Libis::Tools::Metadata::DublinCoreRecord.new(doc.to_xml)
71
+ end
72
+
73
+ else
74
+ raise RuntimeError "Service '#{service}' unknown"
75
+
76
+ end
77
+
78
+ unless record
79
+ raise RuntimeError, "No record found for #{config[:field]} = '#{term}'"
80
+ end
81
+
82
+ record.extend mapper_class
83
+ record.to_dc
84
+
85
+ rescue Exception => e
86
+ raise RuntimeError, "Search request failed: #{e.message}"
87
+ end
88
+
89
+ def save(record, filename)
90
+ return false unless record
91
+
92
+ record.save File.join(@target_dir, filename)
93
+
94
+ true
95
+ end
96
+
97
+ NO_DECL = Nokogiri::XML::Node::SaveOptions::FORMAT + Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
98
+
99
+ def md_update_xml(pid, record)
100
+ Libis::Tools::XmlDocument.parse <<EO_XML
101
+ <updateMD xmlns="http://com/exlibris/digitool/repository/api/xmlbeans">
102
+ <PID>#{pid}</PID>
103
+ <metadata>
104
+ <type>descriptive</type>
105
+ <subType>dc</subType>
106
+ <content>
107
+ <![CDATA[#{record.document.to_xml(save_with: NO_DECL)}]]>
108
+ </content>
109
+ </metadata>
110
+ </updateMD>
111
+ EO_XML
112
+ end
113
+
114
+ end
115
+
116
+ end
117
+ end