libis-tools 0.9.20 → 0.9.21
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +36 -233
- data/Rakefile +5 -0
- data/lib/libis/tools.rb +1 -0
- data/lib/libis/tools/assert.rb +11 -0
- data/lib/libis/tools/checksum.rb +22 -5
- data/lib/libis/tools/command.rb +24 -3
- data/lib/libis/tools/config.rb +61 -33
- data/lib/libis/tools/config_file.rb +0 -1
- data/lib/libis/tools/deep_struct.rb +10 -2
- data/lib/libis/tools/extend/empty.rb +2 -2
- data/lib/libis/tools/extend/hash.rb +37 -18
- data/lib/libis/tools/extend/kernel.rb +9 -0
- data/lib/libis/tools/extend/string.rb +17 -8
- data/lib/libis/tools/logger.rb +95 -44
- data/lib/libis/tools/metadata.rb +5 -1
- data/lib/libis/tools/metadata/dublin_core_record.rb +22 -4
- data/lib/libis/tools/metadata/field_format.rb +49 -9
- data/lib/libis/tools/metadata/fix_field.rb +5 -0
- data/lib/libis/tools/metadata/mapper.rb +2 -1
- data/lib/libis/tools/metadata/mappers/flandrica.rb +8 -1
- data/lib/libis/tools/metadata/mappers/kuleuven.rb +6 -2
- data/lib/libis/tools/metadata/marc21_record.rb +1 -0
- data/lib/libis/tools/metadata/marc_record.rb +31 -12
- data/lib/libis/tools/metadata/parser/basic_parser.rb +2 -0
- data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc21_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc_format_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc_rules.rb +2 -1
- data/lib/libis/tools/metadata/parser/marc_select_parser.rb +2 -1
- data/lib/libis/tools/metadata/parser/patch.rb +1 -0
- data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +2 -1
- data/lib/libis/tools/metadata/sharepoint_mapping.rb +1 -0
- data/lib/libis/tools/metadata/sharepoint_record.rb +2 -0
- data/lib/libis/tools/metadata/var_field.rb +8 -0
- data/lib/libis/tools/mets_dnx.rb +61 -0
- data/lib/libis/tools/mets_file.rb +87 -604
- data/lib/libis/tools/mets_objects.rb +534 -0
- data/lib/libis/tools/parameter.rb +144 -21
- data/lib/libis/tools/thread_safe.rb +31 -0
- data/lib/libis/tools/version.rb +1 -1
- data/lib/libis/tools/xml_document.rb +18 -24
- data/libis-tools.gemspec +6 -2
- data/spec/config_spec.rb +3 -4
- data/spec/logger_spec.rb +13 -30
- data/spec/mets_file_spec.rb +17 -17
- metadata +53 -7
@@ -6,10 +6,17 @@ module Libis
|
|
6
6
|
module Tools
|
7
7
|
module Metadata
|
8
8
|
|
9
|
+
# Conveniece class to create and read DC records.
|
10
|
+
# Most of the functionality is derived from the {::Libis::Tools::XmlDocument} base class. This class puts its
|
11
|
+
# focus on supporting the <dc:xxx> and <dcterms:xxx> namespaces. For most tags the namespaces are added
|
12
|
+
# automatically by checking which tag you want to add. In some cases the same tag exists in both namespaces and
|
13
|
+
# you may want to state the namespace explicitely. Even then things are made as easily as possible.
|
9
14
|
class DublinCoreRecord < Libis::Tools::XmlDocument
|
10
15
|
|
16
|
+
# List of known tags in the DC namespace
|
11
17
|
DC_ELEMENTS = %w'contributor coverage creator date description format identifier language' +
|
12
18
|
%w'publisher relation rights source subject title type'
|
19
|
+
# List of known tags in the DCTERMS namespace
|
13
20
|
DCTERMS_ELEMENTS = %w'abstract accessRights accrualMethod accrualPeriodicity accrualPolicy alternative' +
|
14
21
|
%w'audience available bibliographicCitation conformsTo contributor coverage created creator date' +
|
15
22
|
%w'dateAccepted dateCopyrighted dateSubmitted description educationLevel extent format hasFormat' +
|
@@ -18,6 +25,11 @@ module Libis
|
|
18
25
|
%w'references relation replaces requires rights rightsHolder source spatial subject tableOfContents' +
|
19
26
|
%w'temporal title type valid'
|
20
27
|
|
28
|
+
# Create new DC document.
|
29
|
+
# If the doc parameter is nil a new empty DC document will be created with the dc:record root element and all
|
30
|
+
# required namespaces defined.
|
31
|
+
# @note The input document is not checked if it is a valid DC record XML.
|
32
|
+
# @param [::Libis::Tools::XmlDocument,String,IO,Hash] doc optional document to read.
|
21
33
|
def initialize(doc = nil)
|
22
34
|
super()
|
23
35
|
xml_doc = case doc
|
@@ -45,10 +57,9 @@ module Libis
|
|
45
57
|
raise ArgumentError, 'XML document not valid.' if self.invalid?
|
46
58
|
end
|
47
59
|
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
60
|
+
# Search the document with xpath.
|
61
|
+
# If no namespace is present, the 'dc:' namespace will be added.
|
62
|
+
# @param [String] path any valid XPath expression
|
52
63
|
def xpath(path)
|
53
64
|
m = /^([\/.]*\/)?(dc(terms)?:)?(.*)/.match(path.to_s)
|
54
65
|
return [] unless m[4]
|
@@ -56,6 +67,13 @@ module Libis
|
|
56
67
|
@document.xpath(path.to_s)
|
57
68
|
end
|
58
69
|
|
70
|
+
# Add a node.
|
71
|
+
# You can omit the namespace in the name parameter. The method will add the correct namespace for you. If using
|
72
|
+
# symbols for name, an underscore ('_') can be used as separator instead of the colon (':').
|
73
|
+
# @param [String,Symbol] name tag name of the element
|
74
|
+
# @param [String] value content of the new element
|
75
|
+
# @param [Nokogiri::XML::Node] parent the new element will be attached to this node
|
76
|
+
# @param [Hash] attributes list of <attribute_name>, <attribute_value> pairs for the new element
|
59
77
|
def add_node(name, value = nil, parent = nil, attributes = {})
|
60
78
|
ns, tag = get_namespace(name.to_s)
|
61
79
|
(attributes[:namespaces] ||= {})[:node_ns] ||= ns if ns
|
@@ -4,18 +4,56 @@ module Libis
|
|
4
4
|
module Tools
|
5
5
|
module Metadata
|
6
6
|
|
7
|
+
# Helper class for formatting field data.
|
8
|
+
#
|
9
|
+
# The FieldFormat class can omit prefix and or postfix if no data is present and omits the join string if only
|
10
|
+
# one data element is present.
|
7
11
|
class FieldFormat
|
8
12
|
|
13
|
+
# [Array] the list that makes up the data
|
9
14
|
attr_accessor :parts
|
15
|
+
|
16
|
+
# [String] the text that will be placed in front of the generated text
|
10
17
|
attr_accessor :prefix
|
11
|
-
|
18
|
+
|
19
|
+
# [String] the text that will be placed at the end of the generated text
|
12
20
|
attr_accessor :postfix
|
13
21
|
|
22
|
+
# [String] the text used between the parts of the data
|
23
|
+
attr_accessor :join
|
24
|
+
|
25
|
+
# Create new formatter
|
26
|
+
#
|
27
|
+
# The method takes any number of arguments and processes them as data parts. If the last one is a Hash, it is
|
28
|
+
# interpreted as options hash. The data parts can either be given as an Array or set of arguments or within the
|
29
|
+
# options hash with key +:parts+.
|
30
|
+
#
|
31
|
+
# On each element in the data set the formatter will call the #to_s method to
|
32
|
+
# give each data object the opportunity to process it's data.
|
33
|
+
#
|
34
|
+
# @param [Array, Hash] parts whatever makes up the data to be formatted.
|
14
35
|
def initialize(*parts)
|
15
36
|
@parts = []
|
16
37
|
self[*parts]
|
17
38
|
end
|
18
39
|
|
40
|
+
# Parses the arguments, stripping of an optional last Hash as options.
|
41
|
+
# @param (see #initialize)
|
42
|
+
def [](*parts)
|
43
|
+
options = parts.last.is_a?(Hash) ? parts.pop : {}
|
44
|
+
add parts
|
45
|
+
x = options.delete(:parts)
|
46
|
+
add x if x
|
47
|
+
add_options options
|
48
|
+
end
|
49
|
+
|
50
|
+
# Set options.
|
51
|
+
#
|
52
|
+
# Besides the tree options +:prefix+, +:postfix+ and +:join+ it also accepts the option +:fix+. This combines
|
53
|
+
# both +:prefix+ and +:postfix+ options by specifying "<prefix>|<postfix>". If both prefix and postfix are only
|
54
|
+
# 1 character wide the format "<prefix><postfix>" is also allowed.
|
55
|
+
#
|
56
|
+
# @param [Hash] options the options list
|
19
57
|
def add_options(options = {})
|
20
58
|
if options[:fix]
|
21
59
|
if options[:fix].size == 2
|
@@ -30,6 +68,10 @@ module Libis
|
|
30
68
|
self
|
31
69
|
end
|
32
70
|
|
71
|
+
# Add default options.
|
72
|
+
# (see #add_options)
|
73
|
+
# None of these options will be set if they are already set. If you need to overwrite them, use {#add_options}.
|
74
|
+
# @param (see #add_options)
|
33
75
|
def add_default_options(options = {})
|
34
76
|
options.delete(:prefix) if @prefix
|
35
77
|
options.delete(:postfix) if @postfix
|
@@ -38,18 +80,14 @@ module Libis
|
|
38
80
|
add_options options
|
39
81
|
end
|
40
82
|
|
41
|
-
|
42
|
-
options = parts.last.is_a?(Hash) ? parts.pop : {}
|
43
|
-
add parts
|
44
|
-
x = options.delete(:parts)
|
45
|
-
add x if x
|
46
|
-
add_options options
|
47
|
-
end
|
48
|
-
|
83
|
+
# Shortcut class method for initializer
|
49
84
|
def self.from(*h)
|
50
85
|
self.new(*h)
|
51
86
|
end
|
52
87
|
|
88
|
+
# The real formatter method.
|
89
|
+
# This method parses the data and applies the options to generate the formatted string.
|
90
|
+
# @return [String] the formatter string
|
53
91
|
def to_s
|
54
92
|
@parts.delete_if { |x|
|
55
93
|
x.nil? or
|
@@ -63,6 +101,8 @@ module Libis
|
|
63
101
|
result
|
64
102
|
end
|
65
103
|
|
104
|
+
protected
|
105
|
+
|
66
106
|
def add(part)
|
67
107
|
case part
|
68
108
|
when Hash
|
@@ -4,16 +4,21 @@ module Libis
|
|
4
4
|
module Tools
|
5
5
|
module Metadata
|
6
6
|
|
7
|
+
# Helper class for implementing a fixed field for MARC
|
7
8
|
class FixField
|
8
9
|
|
9
10
|
attr_reader :tag
|
10
11
|
attr_accessor :datas
|
11
12
|
|
13
|
+
# Create new fixed field
|
14
|
+
# @param [String] tag tag
|
15
|
+
# @param [String] datas field data
|
12
16
|
def initialize(tag, datas)
|
13
17
|
@tag = tag
|
14
18
|
@datas = datas || ''
|
15
19
|
end
|
16
20
|
|
21
|
+
|
17
22
|
def [](from = nil, to = nil)
|
18
23
|
return @datas unless from
|
19
24
|
to ? @datas[from..to] : @datas[from]
|
@@ -8,8 +8,9 @@ require_relative 'parsers'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class Mapper
|
14
15
|
|
15
16
|
attr_reader :target_parser, :selection_parser, :format_parser
|
@@ -8,8 +8,15 @@ module Libis
|
|
8
8
|
module Mappers
|
9
9
|
|
10
10
|
# noinspection RubyResolve
|
11
|
+
|
12
|
+
# Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
|
13
|
+
# {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for Flandrica by
|
14
|
+
# extending the version for {::Libis::Tools::Metadata::Mappers::Kuleuven KU Leuven} and overwriting what's
|
15
|
+
# different. This means any change to the KU Leuven mapping may have effect on this mapping as well.
|
11
16
|
module Flandrica
|
12
|
-
|
17
|
+
extend Libis::Tools::Metadata::Mappers::Kuleuven
|
18
|
+
|
19
|
+
protected
|
13
20
|
|
14
21
|
def marc2dc_identifier(xml)
|
15
22
|
Libis::Tools::Metadata::Mappers::Kuleuven.marc2dc_identifier(xml)
|
@@ -8,10 +8,15 @@ module Libis
|
|
8
8
|
module Tools
|
9
9
|
module Metadata
|
10
10
|
module Mappers
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
|
14
|
+
# {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for KU Leuven.
|
13
15
|
module Kuleuven
|
14
16
|
|
17
|
+
# Main conversion method.
|
18
|
+
# @param [String] label optional extra identified to add to the DC record.
|
19
|
+
# @return [::Libis::Tools::Metadata::DublinCoreRecord]
|
15
20
|
def to_dc(label = nil)
|
16
21
|
assert(self.is_a? Libis::Tools::Metadata::MarcRecord)
|
17
22
|
|
@@ -1400,7 +1405,6 @@ module Libis
|
|
1400
1405
|
if DOLLAR4TABLE[data.tag].has_key? code
|
1401
1406
|
return DOLLAR4TABLE[data.tag][code][1]
|
1402
1407
|
end
|
1403
|
-
Application.logger.warn(self.class) { "Did not find $4 value in lookuptable: #{data.dump_line}" }
|
1404
1408
|
:contributor
|
1405
1409
|
end
|
1406
1410
|
|
@@ -85,11 +85,19 @@ module Libis
|
|
85
85
|
|
86
86
|
alias_method :each_tag, :all_tags
|
87
87
|
|
88
|
+
# Get all fields matching search criteria.
|
89
|
+
# As {#all_tags} but without subfield criteria.
|
90
|
+
# @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
|
91
|
+
# extra subfield name is added, a result will be created for each instance found of that subfield.
|
92
|
+
# @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
|
93
|
+
# (the field) and should return true or false. True selects the field, false rejects it.
|
94
|
+
# @return [Array] If a block was supplied to the method call, the array will contain the result of the block
|
95
|
+
# for each tag found. Otherwise the array will just contain the data for each matching tag.
|
88
96
|
def select_fields(tag, select_block = nil, &block)
|
89
97
|
all_tags(tag, nil, select_block, &block)
|
90
98
|
end
|
91
99
|
|
92
|
-
# Find the first
|
100
|
+
# Find the first tag matching the criteria.
|
93
101
|
#
|
94
102
|
# If a block is supplied, it will be called with the found field data. The return value will be whatever the
|
95
103
|
# block returns. If no block is supplied, the field data will be returned. If nothing was found, the return
|
@@ -105,34 +113,43 @@ module Libis
|
|
105
113
|
yield result
|
106
114
|
end
|
107
115
|
|
116
|
+
# Find all fields matching the criteria.
|
117
|
+
# (see #first_tag)
|
118
|
+
# @param (see #first_tag)
|
108
119
|
def all_fields(tag, subfields)
|
109
|
-
r = all_tags(tag, subfields).collect { |
|
120
|
+
r = all_tags(tag, subfields).collect { |t| t.subfields_array(subfields) }.flatten.compact
|
110
121
|
return r unless block_given?
|
111
122
|
r.map { |field| yield field }
|
112
123
|
r.size > 0
|
113
124
|
end
|
114
125
|
|
115
|
-
|
116
|
-
|
126
|
+
# Find the first field matching the criteria
|
127
|
+
# (see #all_fields)
|
128
|
+
# @param (see #all_fields)
|
129
|
+
def first_field(tag, subfields)
|
130
|
+
result = all_fields(tag, subfields).first
|
117
131
|
return result unless block_given?
|
118
132
|
return false unless result
|
119
133
|
yield result
|
120
134
|
true
|
121
135
|
end
|
122
136
|
|
123
|
-
|
124
|
-
|
125
|
-
|
137
|
+
# Perform action on each field found. Code block required.
|
138
|
+
# @param (see #all_fields)
|
139
|
+
def each_field(tag, subfields)
|
140
|
+
all_fields(tag, subfields).each do |field|
|
126
141
|
yield field
|
127
142
|
end
|
128
143
|
end
|
129
144
|
|
145
|
+
# Dump content to string.
|
130
146
|
def marc_dump
|
131
147
|
all.values.flatten.each_with_object([]) { |record, m| m << record.dump }.join
|
132
148
|
end
|
133
149
|
|
150
|
+
# Save the current MARC record to file.
|
151
|
+
# @param [String] filename name of the file
|
134
152
|
def save(filename)
|
135
|
-
|
136
153
|
doc = ::Libis::Tools::XmlDocument.new
|
137
154
|
doc.root = @node
|
138
155
|
|
@@ -142,23 +159,25 @@ module Libis
|
|
142
159
|
::Nokogiri::XML::Node::SaveOptions::AS_XML |
|
143
160
|
::Nokogiri::XML::Node::SaveOptions::FORMAT
|
144
161
|
)
|
145
|
-
|
146
162
|
end
|
147
163
|
|
164
|
+
# Load XML document from file and create a new {MarcRecord} for it.
|
165
|
+
# @param [String] filename name of XML Marc file
|
148
166
|
def self.load(filename)
|
149
|
-
|
150
167
|
doc = ::Libis::Tools::XmlDocument.open(filename)
|
151
168
|
self.new(doc.root)
|
152
|
-
|
153
169
|
end
|
154
170
|
|
171
|
+
# Load XML document from stream and create a new {MarcRecord} for it.
|
172
|
+
# @param [IO,String] io input stream
|
155
173
|
def self.read(io)
|
156
174
|
io = StringIO.new(io) if io.is_a? String
|
157
175
|
doc = ::Libis::Tools::XmlDocument.parse(io)
|
158
176
|
self.new(doc.root)
|
159
|
-
|
160
177
|
end
|
161
178
|
|
179
|
+
# Dump Marc record in Aleph Sequential format
|
180
|
+
# @return [String] Aleph sequential output
|
162
181
|
def to_aseq
|
163
182
|
record = ''
|
164
183
|
doc_number = tag('001').datas
|
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
|
|
7
7
|
module Libis
|
8
8
|
module Tools
|
9
9
|
module Metadata
|
10
|
-
|
11
10
|
# noinspection RubyResolve
|
11
|
+
|
12
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
12
13
|
class DublinCoreParser < Libis::Tools::Metadata::BasicParser
|
13
14
|
rule(:namespace) { match('[^:]').repeat(1).as(:namespace) >> str(':') }
|
14
15
|
rule(:namespace?) { namespace.maybe }
|
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class Marc21Parser < Libis::Tools::Metadata::BasicParser
|
14
15
|
|
15
16
|
root(:marc21)
|
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class MarcFormatParser < Libis::Tools::Metadata::BasicParser
|
14
15
|
include Libis::Tools::Metadata::MarcRules
|
15
16
|
|
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
|
|
8
8
|
module Libis
|
9
9
|
module Tools
|
10
10
|
module Metadata
|
11
|
-
|
12
11
|
# noinspection RubyResolve
|
12
|
+
|
13
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
13
14
|
class MarcSelectParser < Libis::Tools::Metadata::BasicParser
|
14
15
|
include Libis::Tools::Metadata::MarcRules
|
15
16
|
root(:MARC)
|
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
|
|
7
7
|
module Libis
|
8
8
|
module Tools
|
9
9
|
module Metadata
|
10
|
-
|
11
10
|
# noinspection RubyResolve
|
11
|
+
|
12
|
+
# New style parsers and converters for metadata. New, not finished and untested.
|
12
13
|
class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
|
13
14
|
|
14
15
|
root(:criteria)
|