libis-metadata 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +51 -12
- data/lib/libis/metadata/cli/cli_downloader.rb +0 -1
- data/lib/libis/metadata/downloader.rb +10 -9
- data/lib/libis/metadata/mappers/flandrica.rb +54 -56
- data/lib/libis/metadata/mappers/kuleuven.rb +1724 -1726
- data/lib/libis/metadata/mappers/scope.rb +25 -27
- data/lib/libis/metadata/var_field.rb +211 -213
- data/lib/libis/metadata/version.rb +1 -1
- data/metadata.gemspec +2 -2
- metadata +6 -6
@@ -1,46 +1,44 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
require 'libis/
|
3
|
+
require 'libis/metadata/dublin_core_record'
|
4
4
|
require 'libis/tools/assert'
|
5
5
|
|
6
6
|
module Libis
|
7
|
-
module
|
8
|
-
module
|
9
|
-
|
10
|
-
# noinspection RubyResolve
|
7
|
+
module Metadata
|
8
|
+
module Mappers
|
9
|
+
# noinspection RubyResolve
|
11
10
|
|
12
|
-
|
13
|
-
|
11
|
+
# Mixin for {::Libis::Metadata::DublinCoreRecord} to enable conversion of the Scope exported DC record.
|
12
|
+
module Scope
|
14
13
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
14
|
+
# Main conversion method.
|
15
|
+
# @return [::Libis::Metadata::DublinCoreRecord]
|
16
|
+
def to_dc
|
17
|
+
assert(self.is_a? Libis::Metadata::DublinCoreRecord)
|
19
18
|
|
20
|
-
|
19
|
+
doc = Libis::Metadata::DublinCoreRecord.new(self.to_xml)
|
21
20
|
|
22
|
-
|
21
|
+
if doc.isPartOf
|
23
22
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
23
|
+
# create new node for isReferencedBy
|
24
|
+
new_node = doc.add_node(
|
25
|
+
'isReferencedBy',
|
26
|
+
doc.isPartOf.content,
|
27
|
+
nil,
|
28
|
+
'xsi:type' => 'dcterms:URI'
|
29
|
+
)
|
31
30
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
end
|
36
|
-
|
37
|
-
doc
|
31
|
+
# Replace isPartOf with isReferencedBy
|
32
|
+
doc.isPartOf.replace new_node
|
38
33
|
|
39
34
|
end
|
40
35
|
|
36
|
+
doc
|
37
|
+
|
41
38
|
end
|
42
39
|
|
43
40
|
end
|
41
|
+
|
44
42
|
end
|
45
43
|
end
|
46
|
-
end
|
44
|
+
end
|
@@ -5,238 +5,236 @@ require 'libis/tools/assert'
|
|
5
5
|
require_relative 'parser/subfield_criteria_parser'
|
6
6
|
|
7
7
|
module Libis
|
8
|
-
module
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
end
|
8
|
+
module Metadata
|
9
|
+
|
10
|
+
# Helper class implementing a variable field for MARC
|
11
|
+
class VarField
|
12
|
+
|
13
|
+
attr_reader :tag
|
14
|
+
attr_reader :ind1
|
15
|
+
attr_reader :ind2
|
16
|
+
attr_reader :subfield_data
|
17
|
+
|
18
|
+
# Create new variable field with given tag and indicators
|
19
|
+
# @param [String] tag tag
|
20
|
+
# @param [String] ind1 first indicator. nil will be translated into empty string.
|
21
|
+
# @param [String] ind2 second indicator. nil will be translated into empty string.
|
22
|
+
def initialize(tag, ind1, ind2)
|
23
|
+
@tag = tag
|
24
|
+
@ind1 = ind1 || ''
|
25
|
+
@ind2 = ind2 || ''
|
26
|
+
@subfield_data = Hash.new {|h, k| h[k] = Array.new}
|
27
|
+
end
|
29
28
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
29
|
+
# Add subfield to variable field
|
30
|
+
# @param [String] name subfield indicator without '$'
|
31
|
+
# @param [String] value content of the subfield
|
32
|
+
def add_subfield(name, value)
|
33
|
+
@subfield_data[name] << value
|
34
|
+
end
|
36
35
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
36
|
+
# dump the contents
|
37
|
+
#
|
38
|
+
# @return [String] debug output to inspect the contents of the VarField
|
39
|
+
def dump
|
40
|
+
output = "#{@tag}:#{@ind1}:#{@ind2}:\n"
|
41
|
+
@subfield_data.each {|s, t| output += "\t#{s}:#{t}\n"}
|
42
|
+
output
|
43
|
+
end
|
45
44
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
45
|
+
# dump the contents
|
46
|
+
#
|
47
|
+
# @return [String] debug output to inspect the contents of the VarField - Single line version
|
48
|
+
def dump_line
|
49
|
+
output = "#{@tag}:#{@ind1}:#{@ind2}:"
|
50
|
+
@subfield_data.each {|s, t| output += "$#{s}#{t}"}
|
51
|
+
output
|
52
|
+
end
|
54
53
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
54
|
+
# list the subfield codes
|
55
|
+
#
|
56
|
+
# @return [Array] a list of all subfield codes
|
57
|
+
def keys
|
58
|
+
@subfield_data.keys
|
59
|
+
end
|
61
60
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
61
|
+
# get the first (or only) subfield value for the given code
|
62
|
+
#
|
63
|
+
# @return [String] the first or only entry of a subfield or nil if not present
|
64
|
+
# @param s [Character] the subfield code
|
65
|
+
def subfield(s)
|
66
|
+
subfield_array(s).first
|
67
|
+
end
|
69
68
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
69
|
+
# get a list of all subfield values for a given code
|
70
|
+
#
|
71
|
+
# @return [Array] all the entries of a repeatable subfield
|
72
|
+
# @param s [Character] the subfield code
|
73
|
+
def subfield_array(s)
|
74
|
+
assert(s.is_a?(String) && (s =~ /^[\da-z]$/) == 0, 'method expects a lower case alphanumerical char')
|
75
|
+
@subfield_data.has_key?(s) ? @subfield_data[s].dup : []
|
76
|
+
end
|
78
77
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
78
|
+
# get a list of the first subfield values for all the codes in the given string
|
79
|
+
#
|
80
|
+
# @return [Array] list of the first or only entries of all subfield codes in the input string
|
81
|
+
# @param s [String] subfield code specification (see match)
|
82
|
+
#
|
83
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
84
|
+
def subfields(s)
|
85
|
+
assert(s.is_a?(String), 'method expects a string')
|
86
|
+
return [] unless (match_array = match(s))
|
87
|
+
criteria_to_array(match_array.join(' ')).collect {|i| send(:subfield, i)}.flatten.compact
|
88
|
+
end
|
90
89
|
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
90
|
+
# get a list of all the subfield values for all the codes in the given string
|
91
|
+
#
|
92
|
+
# @return [Array] list of the all the entries of all subfield codes in the input string
|
93
|
+
# @param s [String] subfield code criteria (see match)
|
94
|
+
#
|
95
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
96
|
+
|
97
|
+
def subfields_array(s)
|
98
|
+
assert(s.is_a?(String), 'method expects a string')
|
99
|
+
return [] unless (match_array = match(s))
|
100
|
+
criteria_to_array(match_array.join(' ')).collect {|i| send(:subfield_array, i)}.flatten.compact
|
101
|
+
end
|
103
102
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
end
|
103
|
+
# check if the current VarField matches the given subfield criteria.
|
104
|
+
#
|
105
|
+
# @return [String] The matching part(s) of the criteria or nil if no match
|
106
|
+
# @param criteria [String] subfield criteria: sequence of alternative set of subfield codes that should-shouldn't be present
|
107
|
+
#
|
108
|
+
# The subfield criteria consists of groups of characters. At least one of these groups should match for the test to succeed
|
109
|
+
# Within the group sets of codes may be divided by a hyphen (-). The first set of codes must all be present;
|
110
|
+
# the second set of codes must all <b>not</b> be present. Either set may be empty.
|
111
|
+
#
|
112
|
+
# Examples:
|
113
|
+
# 'ab' matches '$a...$b...' => ['ab']
|
114
|
+
# '$a...$b...$c...' => ['ab']
|
115
|
+
# but not '$a...' => nil # ($b missing)
|
116
|
+
# '$b...' => nil # ($a missing)
|
117
|
+
# 'a b' matches '$a...' => ['a']
|
118
|
+
# '$b...' => ['b']
|
119
|
+
# '$a...$b...' => ['a', 'b']
|
120
|
+
# '$a...$b...$c...' => ['a', 'b']
|
121
|
+
# but not '$c...' => nil # ($a or $b must be present)
|
122
|
+
# 'abc-d' matches '$a..,$b...$c...' => ['abc-d']
|
123
|
+
# '$a..,$b...$c...$e...' => ['abc-d']
|
124
|
+
# but not '$a...$b...$e...' => nil # ($c missing)
|
125
|
+
# '$a...$b...$c...$d...' => nil # ($d should not be present)
|
126
|
+
# 'a-b b-a' matches '$a...' => ['a-b']
|
127
|
+
# '$a...$c...' => ['a-b']
|
128
|
+
# '$b...' => ['b-a']
|
129
|
+
# '$b...$c...' => ['b-a']
|
130
|
+
# but not '$a...$b...' => nil
|
131
|
+
# 'a-b c-d' matches '$a...' => ['a-b']
|
132
|
+
# '$a...$c...' => ['a-b', 'c-d']
|
133
|
+
# '$a...$b...$c...' => ['c-d']
|
134
|
+
# '$b...$c...' => ['c-d']
|
135
|
+
# but not '$a...$b...' => nil
|
136
|
+
# '$c...$d...' => nil
|
137
|
+
# '$b...$c...$d...' => nil
|
138
|
+
# '$a...$b...$c...$d...' => nil
|
139
|
+
def match(criteria)
|
140
|
+
begin
|
141
|
+
parser = Libis::Metadata::Parser::SubfieldCriteriaParser.new
|
142
|
+
tree = parser.parse(criteria)
|
143
|
+
return [] if tree.is_a? String
|
144
|
+
tree = [tree] unless tree.is_a? Array
|
145
|
+
result = tree.map do |selection|
|
146
|
+
next unless parser.match_selection(selection, keys)
|
147
|
+
parser.selection_to_s(selection)
|
148
|
+
end.compact
|
149
|
+
return nil if result.empty?
|
150
|
+
result
|
151
|
+
rescue Parslet::ParseFailed => failure
|
152
|
+
failure.cause.set_label(criteria)
|
153
|
+
raise failure
|
156
154
|
end
|
155
|
+
end
|
157
156
|
|
158
|
-
|
157
|
+
private
|
159
158
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
159
|
+
# @return [Array] cleaned up version of the input string
|
160
|
+
# @param subfields [String] subfield code specification
|
161
|
+
# cleans the subfield code specification and splits it into an array of characters
|
162
|
+
# Duplicates will be removed from the array and the order will be untouched.
|
163
|
+
def criteria_to_array(subfields)
|
165
164
|
|
166
|
-
|
167
|
-
|
168
|
-
|
165
|
+
# note that we remove the '-xxx' part as it is only required for matching
|
166
|
+
subfields.gsub(/ |-\w*/, '').split('').uniq
|
167
|
+
end
|
169
168
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
169
|
+
def sort_helper(x)
|
170
|
+
# make sure that everything below 'A' is higher than 'z'
|
171
|
+
# note that this only works for numbers, but that is fine in our case.
|
172
|
+
x < 'A' ? (x.to_i + 123).chr : x
|
173
|
+
end
|
175
174
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
end
|
222
|
-
when 'a'
|
223
|
-
if subfields.size > 1
|
224
|
-
operation = :subfields_array
|
225
|
-
else
|
226
|
-
operation = :subfield_array
|
227
|
-
end
|
228
|
-
else
|
229
|
-
throw "Unknown method invocation: '#{name}' with: #{args}"
|
175
|
+
# implementation for methods for retrieving subfield values
|
176
|
+
#
|
177
|
+
# The methods start with a single character: the operation
|
178
|
+
# 'f' for retrieving only the first occurence of the subfield
|
179
|
+
# 'a' for retrieving all the subfield values for each of the given subfields
|
180
|
+
# if omitted, 'f' is assumed
|
181
|
+
#
|
182
|
+
# Then a '_' acts as a subdivider between the operation and the subfield(s). It must always be present, even
|
183
|
+
# if the operation is omitted.
|
184
|
+
#
|
185
|
+
# The last past is a sequence of subfield codes that should be used for selecting the values. The order in which the
|
186
|
+
# subfields are listed is respected in the resulting array of values.
|
187
|
+
#
|
188
|
+
# Examples:
|
189
|
+
#
|
190
|
+
# t = VarField.new('100', '', '',
|
191
|
+
# { 'a' => %w'Name NickName',
|
192
|
+
# 'b' => %w'LastName MaidenName',
|
193
|
+
# 'c' => %w'eMail',
|
194
|
+
# '1' => %w'Age',
|
195
|
+
# '9' => %w'Score'})
|
196
|
+
#
|
197
|
+
# # >> 100##$aName$aNickName$bLastName$bMaidenName$ceMail$1Age$9Score <<
|
198
|
+
#
|
199
|
+
# t._1ab => ['Age', 'Name', 'LastName']
|
200
|
+
# # equivalent to: t.f_1av or t.fields('1ab')
|
201
|
+
#
|
202
|
+
# t.a_9ab => ['Score', 'Name', 'NickName', 'LastName', 'MaidenName']
|
203
|
+
# # equivalent to: t.fields_array('9ab')
|
204
|
+
#
|
205
|
+
# Note that it is not possible to use a fieldspec for the sequence of subfield codes. Spaces and '-' are not allowed
|
206
|
+
# in method calls. If you want this, use the #subfield(s) and #subfield(s)_array methods.
|
207
|
+
#
|
208
|
+
def method_missing(name, *args)
|
209
|
+
operation, subfields = name.to_s.split('_')
|
210
|
+
assert(subfields.size > 0, 'need to specify at least one subfield')
|
211
|
+
operation = 'f' if operation.empty?
|
212
|
+
# convert subfield list to fieldspec
|
213
|
+
subfields = subfields.split('').join(' ')
|
214
|
+
case operation
|
215
|
+
when 'f'
|
216
|
+
if subfields.size > 1
|
217
|
+
operation = :subfields
|
218
|
+
else
|
219
|
+
operation = :subfield
|
230
220
|
end
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
221
|
+
when 'a'
|
222
|
+
if subfields.size > 1
|
223
|
+
operation = :subfields_array
|
224
|
+
else
|
225
|
+
operation = :subfield_array
|
226
|
+
end
|
227
|
+
else
|
228
|
+
throw "Unknown method invocation: '#{name}' with: #{args}"
|
236
229
|
end
|
230
|
+
send(operation, subfields)
|
231
|
+
end
|
237
232
|
|
233
|
+
def to_ary
|
234
|
+
nil
|
238
235
|
end
|
239
236
|
|
240
237
|
end
|
238
|
+
|
241
239
|
end
|
242
|
-
end
|
240
|
+
end
|