libis-metadata 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +6 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/exe/metadata +5 -0
  12. data/lib/libis/metadata/cli/cli_downloader.rb +182 -0
  13. data/lib/libis/metadata/cli/cli_helper.rb +74 -0
  14. data/lib/libis/metadata/command_line.rb +25 -0
  15. data/lib/libis/metadata/downloader.rb +117 -0
  16. data/lib/libis/metadata/dublin_core_record.rb +115 -0
  17. data/lib/libis/metadata/field_format.rb +119 -0
  18. data/lib/libis/metadata/fix_field.rb +33 -0
  19. data/lib/libis/metadata/mapper.rb +80 -0
  20. data/lib/libis/metadata/mappers/flandrica.rb +76 -0
  21. data/lib/libis/metadata/mappers/kuleuven.rb +1929 -0
  22. data/lib/libis/metadata/mappers/scope.rb +46 -0
  23. data/lib/libis/metadata/marc21_record.rb +49 -0
  24. data/lib/libis/metadata/marc_record.rb +285 -0
  25. data/lib/libis/metadata/parser/basic_parser.rb +116 -0
  26. data/lib/libis/metadata/parser/dublin_core_parser.rb +35 -0
  27. data/lib/libis/metadata/parser/marc21_parser.rb +205 -0
  28. data/lib/libis/metadata/parser/marc_format_parser.rb +51 -0
  29. data/lib/libis/metadata/parser/marc_rules.rb +34 -0
  30. data/lib/libis/metadata/parser/marc_select_parser.rb +24 -0
  31. data/lib/libis/metadata/parser/patch.rb +22 -0
  32. data/lib/libis/metadata/parser/subfield_criteria_parser.rb +70 -0
  33. data/lib/libis/metadata/parsers.rb +12 -0
  34. data/lib/libis/metadata/sharepoint_mapping.rb +119 -0
  35. data/lib/libis/metadata/sharepoint_record.rb +262 -0
  36. data/lib/libis/metadata/var_field.rb +242 -0
  37. data/lib/libis/metadata/version.rb +5 -0
  38. data/lib/libis/metadata.rb +25 -0
  39. data/lib/libis-metadata.rb +1 -0
  40. data/metadata.gemspec +39 -0
  41. metadata +266 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 23ff1ec087acd8448cd8230000f6d5b3aa5f7acf
4
+ data.tar.gz: a908b2af8489af83de9284e529abdfd4e7b68f08
5
+ SHA512:
6
+ metadata.gz: aa677a22babb3b23d1066901db39d6b0d0a40832bdc6d8a28aa14cd0ff21e616ca4ad4c67adf990c7a5a4dcb00068b9c3345e072fce28e42eb7335db57ed8ace
7
+ data.tar.gz: 3223dace5cc94e68359694f2dfaf50640b55af25a156b083bc0ca793abb403285b5fd77df4eee8553d241207be624504a22562da42bcd71204567c153ee2b553
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.travis.yml ADDED
@@ -0,0 +1,5 @@
1
+ sudo: false
2
+ language: ruby
3
+ rvm:
4
+ - 2.5.0
5
+ before_install: gem install bundler -v 1.16.1
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in metadata.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2018 Kris Dekeyser
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,39 @@
1
+ # Metadata
2
+
3
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/metadata`. To experiment with that code, run `bin/console` for an interactive prompt.
4
+
5
+ TODO: Delete this and the text above, and describe your gem
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'libis-metadata'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install libis-metadata
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/metadata.
36
+
37
+ ## License
38
+
39
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
3
+
4
+ RSpec::Core::RakeTask.new(:spec)
5
+
6
+ task :default => :spec
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "libis-metadata"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
data/exe/metadata ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'libis/metadata'
4
+
5
+ Libis::Metadata::CommandLine.start(ARGV)
@@ -0,0 +1,182 @@
1
+ require 'libis/tools/spreadsheet'
2
+ require 'awesome_print'
3
+
4
+ module Libis
5
+ module Metadata
6
+ module Cli
7
+ module Downloader
8
+
9
+ # noinspection RubyStringKeysInHashInspection
10
+ VALID_SOURCES = {
11
+ 'alma' => 'alma.mms_id',
12
+ 'scope' => 'ID'
13
+ }
14
+
15
+ REQ_HEADERS = {term: 'Term'}
16
+ OPT_HEADERS = {pid: 'Pid', filename: 'File'}
17
+
18
+ def self.included(klass)
19
+ klass.class_exec do
20
+ desc 'download [options] [TERM[:PID] ...]', 'Download metadata from Alma or Scope'
21
+ long_desc <<-DESC
22
+
23
+ 'download [TERM ...]' will download metadata from Alma or Scope and convert it to Dublin Core.
24
+
25
+ The output format can either be Dublin Core or a Rosetta MD Update file. If you supply a Rosetta IE PID the
26
+ tool will generate a Rosetta MD Update file, a Dublin Core XML file if you don't. Note that there is no
27
+ check if the IE PID is a valid one.
28
+
29
+ Any TERM argument that starts with a '@' will be interpreted as an input file name. The input file name can
30
+ be:
31
+
32
+ * a simple text file. File extension should be '.txt' and each non-empty line is interpreted as if it was
33
+ supplied as a TERM argument on the command line.
34
+
35
+ * a comma-delimited file (CSV). File extension should be '.csv'.
36
+
37
+ * a tab-deliimited file (TSV). File extension should be '.tsv'.
38
+
39
+ * a spreadsheet. Excel files (.xls or xlsx) and OpenOffice/LibreOffice Calc files (.ods) are supported.
40
+ '@<sheet_name>' must be appended to the file name to select the proper sheet tab.
41
+
42
+ For the CSV, TSV and spreadsheets: if there is no header row, the first column should contain the search
43
+ terms. If present, the second column should contain PID info and the third column FILE info. Other columns
44
+ are ignored. If there is a header row, it should contain at least a cell with the text 'Term'. That column
45
+ is expexted to contain the search terms. If a column header with the text 'Pid' is found, the column data
46
+ will be expected to contain pids for the IE's to modify. If a column header with the text 'File' is found,
47
+ the column data will be expected to contain file names to save to.
48
+
49
+ In any case, if the output file info is missing, the name defaults to the PID (if present) or the search
50
+ term. If the FILE info does not have a file extension, '.xml' will be added.
51
+
52
+ The list of TERM arguments will be processed automatically. If there are no terms supplied on the command
53
+ line, the program will ask for them until you supply an empty value. A TERM argument can contain PID and
54
+ FILE info separated by a ':' or whatever you supply for the separator option. TERM arguments supplied via
55
+ a simple text file are interpreted the same way.
56
+
57
+ Examples:
58
+ * abc => searches for 'abc' and save DC metadata in abc.xml
59
+ * abc:123 => searches for 'abc' and generates MD Update in 123.xml
60
+ * abc:123:xyz.data => searches for 'abc' and generates MD Update in xyz.data
61
+ * abc::xyz => searches for 'abc' and save DC metadata in xyz.xml
62
+
63
+ For any option that is not supplied on the command line and doesn't have a default value, the tool will
64
+ always ask you to supply a value, even if the '-q' option is given.
65
+
66
+ DESC
67
+
68
+ method_option :quiet, aliases: '-q', desc: 'Do not ask for options that have a default value',
69
+ type: :boolean, default: false
70
+ method_option :metadata, aliases: '-m', banner: 'source', desc: 'Metadata source system',
71
+ default: VALID_SOURCES.keys.first, enum: VALID_SOURCES.keys
72
+ method_option :field, aliases: '-f', banner: 'field_name', desc: 'Search field in the Metadata system;' +
73
+ " default value depends on selected metadata source system: #{VALID_SOURCES}"
74
+ method_option :library, aliases: '-l', banner: 'library_code', desc: 'Library code for Alma',
75
+ default: '32KUL_KUL'
76
+ method_option :database, aliases: '-d', desc: 'Scope database to connect to'
77
+ method_option :user, aliases: '-u', desc: 'Database user name'
78
+ method_option :password, aliases: '-p', desc: 'Database password'
79
+ method_option :target_dir, aliases: '-t', desc: 'Directory where files will be created', default: '.'
80
+ method_option :separator, aliases: '-s', desc: 'Separator for the TERM arguments', default: ':'
81
+
82
+ end
83
+ end
84
+
85
+ def download(*terms)
86
+
87
+ unless Dir.exist?(options.target_dir)
88
+ prompt.error "ERROR: target directory '#{options.target_dir}' does not exist"
89
+ exit -1
90
+ end
91
+
92
+ md = Libis::Metadata::Downloader.new
93
+
94
+ config = download_configure
95
+
96
+ md.configure config
97
+
98
+ if terms.empty?
99
+ while (term = prompt.ask "Search #{config[:metadata]} for #{config[:field]}:")
100
+ pid = prompt.ask 'IE PID to update:'
101
+ filename = prompt.ask('File name:', default: "#{pid || term}.xml")
102
+ download_one(service: md, term: term, pid: pid, filename: filename)
103
+ end
104
+ else
105
+ terms.each do |term|
106
+ if term =~ /^@((.+)@(.+)|(.+))$/
107
+ sheet = $3
108
+ file = $2 || $4
109
+ unless File.exist? file
110
+ prompt.warn "WARNING: File name '#{file}' not found."
111
+ next
112
+ end
113
+ if File.extname(file) == '.txt'
114
+ File.open(file, 'r').each_line do |line|
115
+ line.strip!
116
+ next if line.empty?
117
+ x = split_term(line)
118
+ download_one(service: md, term: x[:term], pid: x[:pid], filename: x[:filename])
119
+ end
120
+ else
121
+ opts = {required: REQ_HEADERS, optional: OPT_HEADERS, noheaders: REQ_HEADERS.merge(OPT_HEADERS)}
122
+ opts.merge!(col_sep: "\t", extension: :csv) if File.extname(file) == '.tsv'
123
+ Libis::Tools::Spreadsheet.foreach("#{file}#{sheet ? '|' + sheet : ''}", opts) do |row|
124
+ next if row[:term].nil? || row[:term] == 'Term'
125
+ ap row
126
+ download_one(service: md, term: row[:term], pid: row[:pid], filename: row[:filename])
127
+ end
128
+ end
129
+ next
130
+ end
131
+ x = split_term(term)
132
+ download_one(service: md, term: x[:term], pid: x[:pid], filename: x[:filename])
133
+ end
134
+ end
135
+ end
136
+
137
+ protected
138
+
139
+ def split_term(term)
140
+ result = {}
141
+ t, p, f = term.split(options.separator)
142
+ result[:term] = t
143
+ result[:pid] = p if p && !p.empty?
144
+ result[:filename] = f if f && !f.empty?
145
+ result
146
+ end
147
+
148
+ def download_one(service:, term:, pid: nil, filename: nil)
149
+ filename += '.xml' unless (filename.nil? || !File.extname(filename).empty?)
150
+ filename ||= "#{pid || term}.xml"
151
+ service.download(term, filename, pid)
152
+ prompt.ok "OK: #{filename}#{" [#{term}]"}"
153
+ rescue Exception => e
154
+ prompt.error e.message.strip
155
+ end
156
+
157
+ def download_configure
158
+ result = options.quiet? ? options.key_strings_to_symbols : {}
159
+
160
+ ask 'Metadata source:', result, :metadata, default: options.metadata, enum: VALID_SOURCES.keys
161
+ # Set appropriate default for field depending on source selection
162
+ result[:field] ||= VALID_SOURCES[result[:metadata]] if options.quiet?
163
+ ask 'Search field:', result, :field, default: options.field || VALID_SOURCES[result[:metadata]]
164
+ case result[:metadata]
165
+ when 'alma'
166
+ ask 'Library code:', result, :library, default: options.library
167
+ when 'scope'
168
+ ask 'Database name:', result, :database, default: options.database
169
+ ask 'User name:', result, :user, default: options.user
170
+ ask 'Password:', result, :password, default: options.password, mask: true
171
+ else
172
+ # Other source
173
+ end
174
+ result[:target_dir] = tree_select(options.target_dir, question: 'Target directory:') unless result[:target_dir]
175
+
176
+ result
177
+ end
178
+
179
+ end
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,74 @@
1
+ module Libis
2
+ module Metadata
3
+ module Cli
4
+ module Helper
5
+
6
+ module ClassMethods
7
+
8
+ def exit_on_failure?
9
+ true
10
+ end
11
+
12
+ end
13
+
14
+ def self.included(base)
15
+ base.extend(ClassMethods)
16
+ end
17
+
18
+ attr_reader :prompt
19
+
20
+ def initialize(*args)
21
+ @prompt = TTY::Prompt.new
22
+ super
23
+ end
24
+
25
+ private
26
+
27
+ def index_of(list, value)
28
+ i = list.index(value)
29
+ i += 1 if i
30
+ i
31
+ end
32
+
33
+ def ask(question, config, field, enum: nil, default: nil, mask: false)
34
+ cmd, args, opts = :ask, [question], {}
35
+ if enum
36
+ cmd = :select
37
+ args << enum
38
+ # Change default to its index in the enum
39
+ default = index_of(enum, default)
40
+ # Force the question if the supplied value is not valid
41
+ config[field] = nil unless enum.include? config[field]
42
+ end
43
+ cmd = :mask if mask
44
+ opts[:default] = default if default
45
+ config[field] = prompt.send cmd, *args, opts if config[field].nil?
46
+ end
47
+
48
+ def tree_select(path, question: nil, file: false, page_size: 22, filter: true, cycle: false)
49
+ path = Pathname.new(path) unless path.is_a? Pathname
50
+ path = path.realpath
51
+
52
+ dirs = path.children.select {|x| x.directory?}.sort
53
+ files = file ? path.children.select {|x| x.file?}.sort : []
54
+
55
+ choices = []
56
+ choices << {name: "#{path}", value: path, disabled: file ? '' : false}
57
+ choices << {name: '[..]', value: path.parent}
58
+
59
+ dirs.each {|d| choices << {name: "[#{d.basename}]", value: d}}
60
+ files.each {|f| choices << {name: f.basename.to_path, value: f}}
61
+
62
+ question ||= "Select #{'file or ' if files}directory"
63
+ selection = prompt.select question, choices,
64
+ per_page: page_size, filter: filter, cycle: cycle, default: file ? 2 : 1
65
+
66
+ return selection.to_path if selection == path || selection.file?
67
+
68
+ tree_select selection, file: file, page_size: page_size, filter: filter, cycle: cycle
69
+ end
70
+
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,25 @@
1
+ require 'thor'
2
+ require 'tty-prompt'
3
+
4
+ require 'libis/metadata/downloader'
5
+ require 'libis/tools/extend/hash'
6
+
7
+ require 'libis/metadata/cli/cli_helper'
8
+ require 'libis/metadata/cli/cli_downloader'
9
+
10
+ module Libis
11
+ module Metadata
12
+
13
+ class CommandLine < Thor
14
+
15
+ include Cli::Helper
16
+ include Cli::Downloader
17
+
18
+ def download(*terms)
19
+ super
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,117 @@
1
+ require 'uri'
2
+ require 'pathname'
3
+ require 'awesome_print'
4
+
5
+ require 'libis/services/alma/sru_service'
6
+ require 'libis/services/scope/search'
7
+
8
+ require 'libis/tools/metadata/marc21_record'
9
+ require 'libis/tools/metadata/dublin_core_record'
10
+ require 'libis/tools/xml_document'
11
+ require 'libis/tools/extend/string'
12
+
13
+ require 'libis/tools/metadata/mappers/kuleuven'
14
+ require 'libis/tools/metadata/mappers/scope'
15
+
16
+ module Libis
17
+ module Metadata
18
+
19
+ class Downloader
20
+ attr_reader :service, :mapper_class, :config
21
+
22
+ def initialize
23
+ @service = nil
24
+ @target_dir = nil
25
+ @config = nil
26
+ @mapper_class = nil
27
+ end
28
+
29
+ def configure(config)
30
+ metadata = config[:metadata]
31
+ case metadata
32
+ when 'alma'
33
+ @service ||= Libis::Services::Alma::SruService.new
34
+ @mapper_class = Libis::Tools::Metadata::Mappers::Kuleuven
35
+ when 'scope'
36
+ @service = ::Libis::Services::Scope::Search.new
37
+ @mapper_class = Libis::Tools::Metadata::Mappers::Scope
38
+ @service.connect(config[:password], config[:password], config[:database])
39
+ else
40
+ raise RuntimeError, "Service '#{service}' unknown"
41
+ end
42
+ @target_dir = config[:target_dir]
43
+ @config = config
44
+ rescue Exception => e
45
+ raise RuntimeError "failed to configure metadata service: #{e.message} @ #{e.backtrace.first}"
46
+ end
47
+
48
+ def download(term, filename, pid = nil)
49
+ record = search(term)
50
+ return nil unless record
51
+
52
+ record = md_update_xml(pid, record) if pid
53
+
54
+ record.save File.join(@target_dir, filename)
55
+
56
+ filename
57
+ end
58
+
59
+ # @return [Libis::Tools::Metadata::DublinCoreRecord]
60
+ def search(term)
61
+ record = case service
62
+ when ::Libis::Services::Alma::SruService
63
+ result = service.search(config[:field], URI::encode("\"#{term}\""), config[:library])
64
+ raise RuntimeError "Multiple records found for #{config[:field]}=#{term}" if result.size > 1
65
+ result.empty? ? nil : ::Libis::Tools::Metadata::Marc21Record.new(result.first.root)
66
+
67
+ when ::Libis::Services::Scope::Search
68
+ service.query(term, type: config[:field])
69
+ service.next_record do |doc|
70
+ ::Libis::Tools::Metadata::DublinCoreRecord.new(doc.to_xml)
71
+ end
72
+
73
+ else
74
+ raise RuntimeError "Service '#{service}' unknown"
75
+
76
+ end
77
+
78
+ unless record
79
+ raise RuntimeError, "No record found for #{config[:field]} = '#{term}'"
80
+ end
81
+
82
+ record.extend mapper_class
83
+ record.to_dc
84
+
85
+ rescue Exception => e
86
+ raise RuntimeError, "Search request failed: #{e.message}"
87
+ end
88
+
89
+ def save(record, filename)
90
+ return false unless record
91
+
92
+ record.save File.join(@target_dir, filename)
93
+
94
+ true
95
+ end
96
+
97
+ NO_DECL = Nokogiri::XML::Node::SaveOptions::FORMAT + Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
98
+
99
+ def md_update_xml(pid, record)
100
+ Libis::Tools::XmlDocument.parse <<EO_XML
101
+ <updateMD xmlns="http://com/exlibris/digitool/repository/api/xmlbeans">
102
+ <PID>#{pid}</PID>
103
+ <metadata>
104
+ <type>descriptive</type>
105
+ <subType>dc</subType>
106
+ <content>
107
+ <![CDATA[#{record.document.to_xml(save_with: NO_DECL)}]]>
108
+ </content>
109
+ </metadata>
110
+ </updateMD>
111
+ EO_XML
112
+ end
113
+
114
+ end
115
+
116
+ end
117
+ end