libis-tools 0.9.20 → 0.9.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +36 -233
- data/Rakefile +5 -0
- data/lib/libis/tools.rb +1 -0
- data/lib/libis/tools/assert.rb +11 -0
- data/lib/libis/tools/checksum.rb +22 -5
- data/lib/libis/tools/command.rb +24 -3
- data/lib/libis/tools/config.rb +61 -33
- data/lib/libis/tools/config_file.rb +0 -1
- data/lib/libis/tools/deep_struct.rb +10 -2
- data/lib/libis/tools/extend/empty.rb +2 -2
- data/lib/libis/tools/extend/hash.rb +37 -18
- data/lib/libis/tools/extend/kernel.rb +9 -0
- data/lib/libis/tools/extend/string.rb +17 -8
- data/lib/libis/tools/logger.rb +95 -44
- data/lib/libis/tools/metadata.rb +5 -1
- data/lib/libis/tools/metadata/dublin_core_record.rb +22 -4
- data/lib/libis/tools/metadata/field_format.rb +49 -9
- data/lib/libis/tools/metadata/fix_field.rb +5 -0
- data/lib/libis/tools/metadata/mapper.rb +2 -1
- data/lib/libis/tools/metadata/mappers/flandrica.rb +8 -1
- data/lib/libis/tools/metadata/mappers/kuleuven.rb +6 -2
- data/lib/libis/tools/metadata/marc21_record.rb +1 -0
- data/lib/libis/tools/metadata/marc_record.rb +31 -12
- data/lib/libis/tools/metadata/parser/basic_parser.rb +2 -0
- data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc21_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc_format_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc_rules.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc_select_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/patch.rb +1 -0
- data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +2 -1
- data/lib/libis/tools/metadata/sharepoint_mapping.rb +1 -0
- data/lib/libis/tools/metadata/sharepoint_record.rb +2 -0
- data/lib/libis/tools/metadata/var_field.rb +8 -0
- data/lib/libis/tools/mets_dnx.rb +61 -0
- data/lib/libis/tools/mets_file.rb +87 -604
- data/lib/libis/tools/mets_objects.rb +534 -0
- data/lib/libis/tools/parameter.rb +144 -21
- data/lib/libis/tools/thread_safe.rb +31 -0
- data/lib/libis/tools/version.rb +1 -1
- data/lib/libis/tools/xml_document.rb +18 -24
- data/libis-tools.gemspec +6 -2
- data/spec/config_spec.rb +3 -4
- data/spec/logger_spec.rb +13 -30
- data/spec/mets_file_spec.rb +17 -17
- metadata +53 -7
@@ -6,10 +6,17 @@ module Libis
|
|
6
6
|
module Tools
|
7
7
|
module Metadata
|
8
8
|
|
9
|
+
# Conveniece class to create and read DC records.
|
10
|
+
# Most of the functionality is derived from the {::Libis::Tools::XmlDocument} base class. This class puts its
|
11
|
+
# focus on supporting the <dc:xxx> and <dcterms:xxx> namespaces. For most tags the namespaces are added
|
12
|
+
# automatically by checking which tag you want to add. In some cases the same tag exists in both namespaces and
|
13
|
+
# you may want to state the namespace explicitely. Even then things are made as easily as possible.
|
9
14
|
class DublinCoreRecord < Libis::Tools::XmlDocument
|
10
15
|
|
16
|
+
# List of known tags in the DC namespace
|
11
17
|
DC_ELEMENTS = %w'contributor coverage creator date description format identifier language' +
|
12
18
|
%w'publisher relation rights source subject title type'
|
19
|
+
# List of known tags in the DCTERMS namespace
|
13
20
|
DCTERMS_ELEMENTS = %w'abstract accessRights accrualMethod accrualPeriodicity accrualPolicy alternative' +
|
14
21
|
%w'audience available bibliographicCitation conformsTo contributor coverage created creator date' +
|
15
22
|
%w'dateAccepted dateCopyrighted dateSubmitted description educationLevel extent format hasFormat' +
|
@@ -18,6 +25,11 @@ module Libis
|
|
18
25
|
%w'references relation replaces requires rights rightsHolder source spatial subject tableOfContents' +
|
19
26
|
%w'temporal title type valid'
|
20
27
|
|
28
|
+
# Create new DC document.
|
29
|
+
# If the doc parameter is nil a new empty DC document will be created with the dc:record root element and all
|
30
|
+
# required namespaces defined.
|
31
|
+
# @note The input document is not checked if it is a valid DC record XML.
|
32
|
+
# @param [::Libis::Tools::XmlDocument,String,IO,Hash] doc optional document to read.
|
21
33
|
def initialize(doc = nil)
|
22
34
|
super()
|
23
35
|
xml_doc = case doc
|
@@ -45,10 +57,9 @@ module Libis
|
|
45
57
|
raise ArgumentError, 'XML document not valid.' if self.invalid?
|
46
58
|
end
|
47
59
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
60
|
+
# Search the document with xpath.
|
61
|
+
# If no namespace is present, the 'dc:' namespace will be added.
|
62
|
+
# @param [String] path any valid XPath expression
|
52
63
|
def xpath(path)
|
53
64
|
m = /^([\/.]*\/)?(dc(terms)?:)?(.*)/.match(path.to_s)
|
54
65
|
return [] unless m[4]
|
@@ -56,6 +67,13 @@ module Libis
|
|
56
67
|
@document.xpath(path.to_s)
|
57
68
|
end
|
58
69
|
|
70
|
+
# Add a node.
|
71
|
+
# You can omit the namespace in the name parameter. The method will add the correct namespace for you. If using
|
72
|
+
# symbols for name, an underscore ('_') can be used as separator instead of the colon (':').
|
73
|
+
# @param [String,Symbol] name tag name of the element
|
74
|
+
# @param [String] value content of the new element
|
75
|
+
# @param [Nokogiri::XML::Node] parent the new element will be attached to this node
|
76
|
+
# @param [Hash] attributes list of <attribute_name>, <attribute_value> pairs for the new element
|
59
77
|
def add_node(name, value = nil, parent = nil, attributes = {})
|
60
78
|
ns, tag = get_namespace(name.to_s)
|
61
79
|
(attributes[:namespaces] ||= {})[:node_ns] ||= ns if ns
|
@@ -4,18 +4,56 @@ module Libis
|
|
4
4
|
module Tools
|
5
5
|
module Metadata
|
6
6
|
|
7
|
+
# Helper class for formatting field data.
|
8
|
+
#
|
9
|
+
# The FieldFormat class can omit prefix and or postfix if no data is present and omits the join string if only
|
10
|
+
# one data element is present.
|
7
11
|
class FieldFormat
|
8
12
|
|
13
|
+
# [Array] the list that makes up the data
|
9
14
|
attr_accessor :parts
|
15
|
+
|
16
|
+
# [String] the text that will be placed in front of the generated text
|
10
17
|
attr_accessor :prefix
|
11
|
-
|
18
|
+
|
19
|
+
# [String] the text that will be placed at the end of the generated text
|
12
20
|
attr_accessor :postfix
|
13
21
|
|
22
|
+
# [String] the text used between the parts of the data
|
23
|
+
attr_accessor :join
|
24
|
+
|
25
|
+
# Create new formatter
|
26
|
+
#
|
27
|
+
# The method takes any number of arguments and processes them as data parts. If the last one is a Hash, it is
|
28
|
+
# interpreted as options hash. The data parts can either be given as an Array or set of arguments or within the
|
29
|
+
# options hash with key +:parts+.
|
30
|
+
#
|
31
|
+
# On each element in the data set the formatter will call the #to_s method to
|
32
|
+
# give each data object the opportunity to process it's data.
|
33
|
+
#
|
34
|
+
# @param [Array, Hash] parts whatever makes up the data to be formatted.
|
14
35
|
def initialize(*parts)
|
15
36
|
@parts = []
|
16
37
|
self[*parts]
|
17
38
|
end
|
18
39
|
|
40
|
+
# Parses the arguments, stripping of an optional last Hash as options.
|
41
|
+
# @param (see #initialize)
|
42
|
+
def [](*parts)
|
43
|
+
options = parts.last.is_a?(Hash) ? parts.pop : {}
|
44
|
+
add parts
|
45
|
+
x = options.delete(:parts)
|
46
|
+
add x if x
|
47
|
+
add_options options
|
48
|
+
end
|
49
|
+
|
50
|
+
# Set options.
|
51
|
+
#
|
52
|
+
# Besides the tree options +:prefix+, +:postfix+ and +:join+ it also accepts the option +:fix+. This combines
|
53
|
+
# both +:prefix+ and +:postfix+ options by specifying "<prefix>|<postfix>". If both prefix and postfix are only
|
54
|
+
# 1 character wide the format "<prefix><postfix>" is also allowed.
|
55
|
+
#
|
56
|
+
# @param [Hash] options the options list
|
19
57
|
def add_options(options = {})
|
20
58
|
if options[:fix]
|
21
59
|
if options[:fix].size == 2
|
@@ -30,6 +68,10 @@ module Libis
|
|
30
68
|
self
|
31
69
|
end
|
32
70
|
|
71
|
+
# Add default options.
|
72
|
+
# (see #add_options)
|
73
|
+
# None of these options will be set if they are already set. If you need to overwrite them, use {#add_options}.
|
74
|
+
# @param (see #add_options)
|
33
75
|
def add_default_options(options = {})
|
34
76
|
options.delete(:prefix) if @prefix
|
35
77
|
options.delete(:postfix) if @postfix
|
@@ -38,18 +80,14 @@ module Libis
|
|
38
80
|
add_options options
|
39
81
|
end
|
40
82
|
|
41
|
-
|
42
|
-
options = parts.last.is_a?(Hash) ? parts.pop : {}
|
43
|
-
add parts
|
44
|
-
x = options.delete(:parts)
|
45
|
-
add x if x
|
46
|
-
add_options options
|
47
|
-
end
|
48
|
-
|
83
|
+
# Shortcut class method for initializer
|
49
84
|
def self.from(*h)
|
50
85
|
self.new(*h)
|
51
86
|
end
|
52
87
|
|
88
|
+
# The real formatter method.
|
89
|
+
# This method parses the data and applies the options to generate the formatted string.
|
90
|
+
# @return [String] the formatter string
|
53
91
|
def to_s
|
54
92
|
@parts.delete_if { |x|
|
55
93
|
x.nil? or
|
@@ -63,6 +101,8 @@ module Libis
|
|
63
101
|
result
|
64
102
|
end
|
65
103
|
|
104
|
+
protected
|
105
|
+
|
66
106
|
def add(part)
|
67
107
|
case part
|
68
108
|
when Hash
|
@@ -4,16 +4,21 @@ module Libis
|
|
4
4
|
module Tools
|
5
5
|
module Metadata
|
6
6
|
|
7
|
+
# Helper class for implementing a fixed field for MARC
|
7
8
|
class FixField
|
8
9
|
|
9
10
|
attr_reader :tag
|
10
11
|
attr_accessor :datas
|
11
12
|
|
13
|
+
# Create new fixed field
|
14
|
+
# @param [String] tag tag
|
15
|
+
# @param [String] datas field data
|
12
16
|
def initialize(tag, datas)
|
13
17
|
@tag = tag
|
14
18
|
@datas = datas || ''
|
15
19
|
end
|
16
20
|
|
21
|
+
|
17
22
|
def [](from = nil, to = nil)
|
18
23
|
return @datas unless from
|
19
24
|
to ? @datas[from..to] : @datas[from]
|
@@ -8,8 +8,9 @@ require_relative 'parsers'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class Mapper
|
14
15
|
|
15
16
|
attr_reader :target_parser, :selection_parser, :format_parser
|
@@ -8,8 +8,15 @@ module Libis
|
|
8
8
|
module Mappers
|
9
9
|
|
10
10
|
# noinspection RubyResolve
|
11
|
+
|
12
|
+
# Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
|
13
|
+
# {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for Flandrica by
|
14
|
+
# extending the version for {::Libis::Tools::Metadata::Mappers::Kuleuven KU Leuven} and overwriting what's
|
15
|
+
# different. This means any change to the KU Leuven mapping may have effect on this mapping as well.
|
11
16
|
module Flandrica
|
12
|
-
|
17
|
+
extend Libis::Tools::Metadata::Mappers::Kuleuven
|
18
|
+
|
19
|
+
protected
|
13
20
|
|
14
21
|
def marc2dc_identifier(xml)
|
15
22
|
Libis::Tools::Metadata::Mappers::Kuleuven.marc2dc_identifier(xml)
|
@@ -8,10 +8,15 @@ module Libis
|
|
8
8
|
module Tools
|
9
9
|
module Metadata
|
10
10
|
module Mappers
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
|
14
|
+
# {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for KU Leuven.
|
13
15
|
module Kuleuven
|
14
16
|
|
17
|
+
# Main conversion method.
|
18
|
+
# @param [String] label optional extra identified to add to the DC record.
|
19
|
+
# @return [::Libis::Tools::Metadata::DublinCoreRecord]
|
15
20
|
def to_dc(label = nil)
|
16
21
|
assert(self.is_a? Libis::Tools::Metadata::MarcRecord)
|
17
22
|
|
@@ -1400,7 +1405,6 @@ module Libis
|
|
1400
1405
|
if DOLLAR4TABLE[data.tag].has_key? code
|
1401
1406
|
return DOLLAR4TABLE[data.tag][code][1]
|
1402
1407
|
end
|
1403
|
-
Application.logger.warn(self.class) { "Did not find $4 value in lookuptable: #{data.dump_line}" }
|
1404
1408
|
:contributor
|
1405
1409
|
end
|
1406
1410
|
|
@@ -85,11 +85,19 @@ module Libis
|
|
85
85
|
|
86
86
|
alias_method :each_tag, :all_tags
|
87
87
|
|
88
|
+
# Get all fields matching search criteria.
|
89
|
+
# As {#all_tags} but without subfield criteria.
|
90
|
+
# @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
|
91
|
+
# extra subfield name is added, a result will be created for each instance found of that subfield.
|
92
|
+
# @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
|
93
|
+
# (the field) and should return true or false. True selects the field, false rejects it.
|
94
|
+
# @return [Array] If a block was supplied to the method call, the array will contain the result of the block
|
95
|
+
# for each tag found. Otherwise the array will just contain the data for each matching tag.
|
88
96
|
def select_fields(tag, select_block = nil, &block)
|
89
97
|
all_tags(tag, nil, select_block, &block)
|
90
98
|
end
|
91
99
|
|
92
|
-
# Find the first
|
100
|
+
# Find the first tag matching the criteria.
|
93
101
|
#
|
94
102
|
# If a block is supplied, it will be called with the found field data. The return value will be whatever the
|
95
103
|
# block returns. If no block is supplied, the field data will be returned. If nothing was found, the return
|
@@ -105,34 +113,43 @@ module Libis
|
|
105
113
|
yield result
|
106
114
|
end
|
107
115
|
|
116
|
+
# Find all fields matching the criteria.
|
117
|
+
# (see #first_tag)
|
118
|
+
# @param (see #first_tag)
|
108
119
|
def all_fields(tag, subfields)
|
109
|
-
r = all_tags(tag, subfields).collect { |
|
120
|
+
r = all_tags(tag, subfields).collect { |t| t.subfields_array(subfields) }.flatten.compact
|
110
121
|
return r unless block_given?
|
111
122
|
r.map { |field| yield field }
|
112
123
|
r.size > 0
|
113
124
|
end
|
114
125
|
|
115
|
-
|
116
|
-
|
126
|
+
# Find the first field matching the criteria
|
127
|
+
# (see #all_fields)
|
128
|
+
# @param (see #all_fields)
|
129
|
+
def first_field(tag, subfields)
|
130
|
+
result = all_fields(tag, subfields).first
|
117
131
|
return result unless block_given?
|
118
132
|
return false unless result
|
119
133
|
yield result
|
120
134
|
true
|
121
135
|
end
|
122
136
|
|
123
|
-
|
124
|
-
|
125
|
-
|
137
|
+
# Perform action on each field found. Code block required.
|
138
|
+
# @param (see #all_fields)
|
139
|
+
def each_field(tag, subfields)
|
140
|
+
all_fields(tag, subfields).each do |field|
|
126
141
|
yield field
|
127
142
|
end
|
128
143
|
end
|
129
144
|
|
145
|
+
# Dump content to string.
|
130
146
|
def marc_dump
|
131
147
|
all.values.flatten.each_with_object([]) { |record, m| m << record.dump }.join
|
132
148
|
end
|
133
149
|
|
150
|
+
# Save the current MARC record to file.
|
151
|
+
# @param [String] filename name of the file
|
134
152
|
def save(filename)
|
135
|
-
|
136
153
|
doc = ::Libis::Tools::XmlDocument.new
|
137
154
|
doc.root = @node
|
138
155
|
|
@@ -142,23 +159,25 @@ module Libis
|
|
142
159
|
::Nokogiri::XML::Node::SaveOptions::AS_XML |
|
143
160
|
::Nokogiri::XML::Node::SaveOptions::FORMAT
|
144
161
|
)
|
145
|
-
|
146
162
|
end
|
147
163
|
|
164
|
+
# Load XML document from file and create a new {MarcRecord} for it.
|
165
|
+
# @param [String] filename name of XML Marc file
|
148
166
|
def self.load(filename)
|
149
|
-
|
150
167
|
doc = ::Libis::Tools::XmlDocument.open(filename)
|
151
168
|
self.new(doc.root)
|
152
|
-
|
153
169
|
end
|
154
170
|
|
171
|
+
# Load XML document from stream and create a new {MarcRecord} for it.
|
172
|
+
# @param [IO,String] io input stream
|
155
173
|
def self.read(io)
|
156
174
|
io = StringIO.new(io) if io.is_a? String
|
157
175
|
doc = ::Libis::Tools::XmlDocument.parse(io)
|
158
176
|
self.new(doc.root)
|
159
|
-
|
160
177
|
end
|
161
178
|
|
179
|
+
# Dump Marc record in Aleph Sequential format
|
180
|
+
# @return [String] Aleph sequential output
|
162
181
|
def to_aseq
|
163
182
|
record = ''
|
164
183
|
doc_number = tag('001').datas
|
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
|
|
7
7
|
module Libis
|
8
8
|
module Tools
|
9
9
|
module Metadata
|
10
|
-
|
11
10
|
# noinspection RubyResolve
|
11
|
+
|
12
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
12
13
|
class DublinCoreParser < Libis::Tools::Metadata::BasicParser
|
13
14
|
rule(:namespace) { match('[^:]').repeat(1).as(:namespace) >> str(':') }
|
14
15
|
rule(:namespace?) { namespace.maybe }
|
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class Marc21Parser < Libis::Tools::Metadata::BasicParser
|
14
15
|
|
15
16
|
root(:marc21)
|
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class MarcFormatParser < Libis::Tools::Metadata::BasicParser
|
14
15
|
include Libis::Tools::Metadata::MarcRules
|
15
16
|
|
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class MarcSelectParser < Libis::Tools::Metadata::BasicParser
|
14
15
|
include Libis::Tools::Metadata::MarcRules
|
15
16
|
root(:MARC)
|
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
|
|
7
7
|
module Libis
|
8
8
|
module Tools
|
9
9
|
module Metadata
|
10
|
-
|
11
10
|
# noinspection RubyResolve
|
11
|
+
|
12
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
12
13
|
class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
|
13
14
|
|
14
15
|
root(:criteria)
|