libis-tools 0.9.9 → 0.9.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/README.md +19 -0
- data/lib/libis/tools.rb +1 -0
- data/lib/libis/tools/config_file.rb +1 -1
- data/lib/libis/tools/extend/kernel.rb +16 -0
- data/lib/libis/tools/metadata.rb +20 -0
- data/lib/libis/tools/metadata/dublin_core_record.rb +1 -1
- data/lib/libis/tools/metadata/{field_spec.rb → field_format.rb} +7 -7
- data/lib/libis/tools/metadata/fix_field.rb +6 -1
- data/lib/libis/tools/metadata/mapper.rb +80 -0
- data/lib/libis/tools/metadata/mappers/flandrica.rb +69 -0
- data/lib/libis/tools/metadata/mappers/kuleuven.rb +1702 -0
- data/lib/libis/tools/metadata/marc21_record.rb +5 -4
- data/lib/libis/tools/metadata/marc_record.rb +96 -37
- data/lib/libis/tools/metadata/parser/basic_parser.rb +118 -0
- data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +36 -0
- data/lib/libis/tools/metadata/parser/marc21_parser.rb +206 -0
- data/lib/libis/tools/metadata/parser/marc_format_parser.rb +52 -0
- data/lib/libis/tools/metadata/parser/marc_rules.rb +35 -0
- data/lib/libis/tools/metadata/parser/marc_select_parser.rb +25 -0
- data/lib/libis/tools/metadata/parser/patch.rb +21 -0
- data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +71 -0
- data/lib/libis/tools/metadata/parsers.rb +12 -0
- data/lib/libis/tools/metadata/var_field.rb +57 -47
- data/lib/libis/tools/parameter.rb +12 -2
- data/lib/libis/tools/version.rb +1 -1
- data/libis-tools.gemspec +4 -3
- data/spec/config_spec.rb +3 -1
- data/spec/data/MetadataMapping.xlsx +0 -0
- data/spec/metadata/8389207.marc +96 -0
- data/spec/metadata/dublin_core_parser_spec.rb +48 -0
- data/spec/metadata/marc21_parser_data.rb +382 -0
- data/spec/metadata/marc21_parser_spec.rb +67 -0
- data/spec/metadata/marc21_spec.rb +116 -0
- data/spec/metadata/metadata_mapper_spec.rb +23 -0
- data/spec/spec_helper.rb +13 -0
- data/test.rb +61 -0
- metadata +77 -7
- data/lib/libis/tools/dc_record.rb +0 -47
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
require_relative 'basic_parser'
|
6
|
+
require_relative 'marc_rules'
|
7
|
+
|
8
|
+
module Libis
|
9
|
+
module Tools
|
10
|
+
module Metadata
|
11
|
+
|
12
|
+
# noinspection RubyResolve
|
13
|
+
class MarcFormatParser < Libis::Tools::Metadata::BasicParser
|
14
|
+
include Libis::Tools::Metadata::MarcRules
|
15
|
+
|
16
|
+
root(:mapping)
|
17
|
+
|
18
|
+
rule(:mapping) { entry.repeat(1).as(:entry) >> postfix?.as(:postfix) }
|
19
|
+
|
20
|
+
rule(:entry) { group.as(:group) | sf_reference }
|
21
|
+
rule(:group) { prefix?.as(:prefix) >> grouped(mapping) }
|
22
|
+
|
23
|
+
# pre- and postfix
|
24
|
+
rule(:prefix) { other.repeat(1) }
|
25
|
+
rule(:prefix) { text }
|
26
|
+
rule(:prefix?) { prefix.maybe }
|
27
|
+
rule(:postfix) { other.repeat(1) }
|
28
|
+
rule(:postfix) { text }
|
29
|
+
rule(:postfix?) { postfix.maybe }
|
30
|
+
|
31
|
+
# subfield reference
|
32
|
+
rule(:sf_reference) { sf_variable.as(:subfield) | sf_fixed.as(:fixfield) }
|
33
|
+
|
34
|
+
rule(:sf_variable) { prefix?.as(:prefix) >> sf_indicator >> sf_repeat?.as(:repeat) >> sf_name }
|
35
|
+
rule(:sf_repeat) { star >>
|
36
|
+
(dquote >> not_dquote.repeat.as(:separator) >> dquote |
|
37
|
+
squote >> not_squote.repeat.as(:separator) >> squote
|
38
|
+
).maybe
|
39
|
+
}
|
40
|
+
rule(:sf_repeat?) { sf_repeat.maybe }
|
41
|
+
|
42
|
+
rule(:sf_fixed) { prefix?.as(:prefix) >> sf_indicator >> str('@') >> (sf_position | sf_range | sf_star) }
|
43
|
+
rule(:sf_position) { lsparen >> integer.as(:position) >> rsparen }
|
44
|
+
rule(:sf_range) { lsparen >> integer.as(:first) >> minus >> integer.as(:last) >> rsparen }
|
45
|
+
rule(:sf_star) { star.as(:all) }
|
46
|
+
|
47
|
+
rule(:other) { paren.absent? >> dollar.absent? >> any | str('$$') }
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Tools
|
7
|
+
module Metadata
|
8
|
+
|
9
|
+
# noinspection RubyResolve
|
10
|
+
module MarcRules
|
11
|
+
include Parslet
|
12
|
+
|
13
|
+
# tag
|
14
|
+
rule(:tag) { tag_numeric | tag_alpha }
|
15
|
+
rule(:tag_numeric) { number.repeat(3, 3) }
|
16
|
+
rule(:tag_alpha) { character.repeat(3, 3) }
|
17
|
+
|
18
|
+
# indicator
|
19
|
+
rule(:indicator) { hashtag | underscore | number | character }
|
20
|
+
rule(:indicator?) { indicator.maybe }
|
21
|
+
rule(:indicators) { indicator?.as(:ind1) >> indicator?.as(:ind2) }
|
22
|
+
|
23
|
+
# subfield
|
24
|
+
rule(:sf_indicator) { dollar }
|
25
|
+
rule(:sf_name) { (character | number).as(:name) }
|
26
|
+
rule(:sf_name?) { sf_name.maybe }
|
27
|
+
rule(:sf_names) { (character | number).repeat(1).as(:names) }
|
28
|
+
rule(:sf_names?) { sf_names.maybe }
|
29
|
+
rule(:subfield) { sf_indicator >> sf_name }
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
require_relative 'basic_parser'
|
6
|
+
require_relative 'marc_rules'
|
7
|
+
|
8
|
+
module Libis
|
9
|
+
module Tools
|
10
|
+
module Metadata
|
11
|
+
|
12
|
+
# noinspection RubyResolve
|
13
|
+
class MarcSelectParser < Libis::Tools::Metadata::BasicParser
|
14
|
+
include Libis::Tools::Metadata::MarcRules
|
15
|
+
root(:MARC)
|
16
|
+
rule(:MARC) { str('MARC') >> spaces? >> tag.as(:tag) >> spaces? >> indicators >> spaces? >> subfield.maybe.as(:subfield) }
|
17
|
+
|
18
|
+
# subfield
|
19
|
+
# rule(:sf_condition) { sf_indicator >> sf_names >> (space >> sf_names).repeat }
|
20
|
+
# rule(:sf_names) { sf_name.repeat(1) }
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class Parslet::Pattern
|
2
|
+
|
3
|
+
def element_match_hash(tree, exp, bindings)
|
4
|
+
return false if exp.size < tree.size
|
5
|
+
exp.each do |expected_key, expected_value|
|
6
|
+
if expected_key.to_s =~ /^(.*)\?$/
|
7
|
+
expected_key = expected_key.is_a?(Symbol) ? $1.to_sym : $1
|
8
|
+
return true unless tree.has_key? expected_key
|
9
|
+
end
|
10
|
+
|
11
|
+
return false unless tree.has_key? expected_key
|
12
|
+
|
13
|
+
# Recurse into the value and stop early on failure
|
14
|
+
value = tree[expected_key]
|
15
|
+
return false unless element_match(value, expected_value, bindings)
|
16
|
+
end
|
17
|
+
|
18
|
+
true
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
require_relative 'basic_parser'
|
6
|
+
|
7
|
+
module Libis
|
8
|
+
module Tools
|
9
|
+
module Metadata
|
10
|
+
|
11
|
+
# noinspection RubyResolve
|
12
|
+
class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
|
13
|
+
|
14
|
+
root(:criteria)
|
15
|
+
|
16
|
+
rule(:criteria) { selection >> (spaces >> selection).repeat }
|
17
|
+
|
18
|
+
rule(:selection) { must >> must_not.maybe }
|
19
|
+
|
20
|
+
rule(:must) { names.as(:must).maybe >> (one_of | only_one_of).maybe }
|
21
|
+
rule(:must_not) { minus >> must.as(:not) }
|
22
|
+
|
23
|
+
rule(:one_of) { lrparen >> names.as(:one_of) >> rrparen }
|
24
|
+
rule(:only_one_of) { lcparen >> names.as(:only_one_of) >> rcparen }
|
25
|
+
|
26
|
+
rule(:names) { (character | number).repeat(1) }
|
27
|
+
|
28
|
+
def criteria_to_s(criteria)
|
29
|
+
case criteria
|
30
|
+
when Array
|
31
|
+
# leave as is
|
32
|
+
when Hash
|
33
|
+
criteria = [criteria]
|
34
|
+
else
|
35
|
+
return criteria
|
36
|
+
end
|
37
|
+
criteria.map { |selection| selection_to_s(selection) }.join(' ')
|
38
|
+
end
|
39
|
+
|
40
|
+
def selection_to_s(selection)
|
41
|
+
return selection unless selection.is_a? Hash
|
42
|
+
result = "#{selection[:must]}"
|
43
|
+
result += "(#{selection[:one_of]})" if selection[:one_of]
|
44
|
+
result += "{#{selection[:only_one_of]}}" if selection[:only_one_of]
|
45
|
+
result += "-#{selection_to_s(selection[:not])}" if selection[:not]
|
46
|
+
result
|
47
|
+
end
|
48
|
+
|
49
|
+
def match_criteria(criteria, data)
|
50
|
+
tree = self.new.parse(criteria)
|
51
|
+
return true if tree.is_a? String
|
52
|
+
tree = [tree] unless tree.is_a? Array
|
53
|
+
tree.map { |selection| match_selection(selection, data) }.any?
|
54
|
+
end
|
55
|
+
|
56
|
+
def match_selection(selection, data)
|
57
|
+
must_match = selection[:must].to_s.split('')
|
58
|
+
return false unless must_match == (must_match & data)
|
59
|
+
one_of = selection[:one_of].to_s.split('')
|
60
|
+
return false unless one_of.empty? || (one_of & data).any?
|
61
|
+
only_one_of = selection[:only_one_of].to_s.split('')
|
62
|
+
return false unless only_one_of.empty? || (only_one_of & data).size != 1
|
63
|
+
return false if match_selection(selection[:not], data) if selection[:not]
|
64
|
+
true
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Libis
|
2
|
+
module Tools
|
3
|
+
module Metadata
|
4
|
+
|
5
|
+
autoload :BasicParser, 'libis/tools/metadata/parser/basic_parser'
|
6
|
+
autoload :DublinCoreParser, 'libis/tools/metadata/parser/dublin_core_parser'
|
7
|
+
autoload :Marc21Parser, 'libis/tools/metadata/parser/marc21_parser'
|
8
|
+
autoload :SubfieldCriteriaParser, 'libis/tools/metadata/parser/subfield_criteria_parser'
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'libis/tools/assert'
|
4
4
|
|
5
|
+
require_relative 'parser/subfield_criteria_parser'
|
6
|
+
|
5
7
|
module Libis
|
6
8
|
module Tools
|
7
9
|
module Metadata
|
@@ -11,13 +13,17 @@ module Libis
|
|
11
13
|
attr_reader :tag
|
12
14
|
attr_reader :ind1
|
13
15
|
attr_reader :ind2
|
14
|
-
attr_reader :
|
16
|
+
attr_reader :subfield_data
|
15
17
|
|
16
|
-
def initialize(tag, ind1, ind2
|
18
|
+
def initialize(tag, ind1, ind2)
|
17
19
|
@tag = tag
|
18
|
-
@ind1 = ind1 || '
|
19
|
-
@ind2 = ind2 || '
|
20
|
-
@
|
20
|
+
@ind1 = ind1 || ''
|
21
|
+
@ind2 = ind2 || ''
|
22
|
+
@subfield_data = Hash.new { |h, k| h[k] = Array.new }
|
23
|
+
end
|
24
|
+
|
25
|
+
def add_subfield(name, value)
|
26
|
+
@subfield_data[name] << value
|
21
27
|
end
|
22
28
|
|
23
29
|
# dump the contents
|
@@ -25,7 +31,7 @@ module Libis
|
|
25
31
|
# @return [String] debug output to inspect the contents of the VarField
|
26
32
|
def dump
|
27
33
|
output = "#{@tag}:#{@ind1}:#{@ind2}:\n"
|
28
|
-
@
|
34
|
+
@subfield_data.each { |s, t| output += "\t#{s}:#{t}\n" }
|
29
35
|
output
|
30
36
|
end
|
31
37
|
|
@@ -34,7 +40,7 @@ module Libis
|
|
34
40
|
# @return [String] debug output to inspect the contents of the VarField - Single line version
|
35
41
|
def dump_line
|
36
42
|
output = "#{@tag}:#{@ind1}:#{@ind2}:"
|
37
|
-
@
|
43
|
+
@subfield_data.each { |s, t| output += "$#{s}#{t}" }
|
38
44
|
output
|
39
45
|
end
|
40
46
|
|
@@ -42,57 +48,57 @@ module Libis
|
|
42
48
|
#
|
43
49
|
# @return [Array] a list of all subfield codes
|
44
50
|
def keys
|
45
|
-
@
|
51
|
+
@subfield_data.keys
|
46
52
|
end
|
47
53
|
|
48
54
|
# get the first (or only) subfield value for the given code
|
49
55
|
#
|
50
56
|
# @return [String] the first or only entry of a subfield or nil if not present
|
51
57
|
# @param s [Character] the subfield code
|
52
|
-
def
|
53
|
-
|
58
|
+
def subfield(s)
|
59
|
+
subfield_array(s).first
|
54
60
|
end
|
55
61
|
|
56
62
|
# get a list of all subfield values for a given code
|
57
63
|
#
|
58
64
|
# @return [Array] all the entries of a repeatable subfield
|
59
65
|
# @param s [Character] the subfield code
|
60
|
-
def
|
66
|
+
def subfield_array(s)
|
61
67
|
assert(s.is_a?(String) && (s =~ /^[\da-z]$/) == 0, 'method expects a lower case alphanumerical char')
|
62
|
-
@
|
68
|
+
@subfield_data.has_key?(s) ? @subfield_data[s].dup : []
|
63
69
|
end
|
64
70
|
|
65
|
-
# get a list of the first subfield
|
71
|
+
# get a list of the first subfield values for all the codes in the given string
|
66
72
|
#
|
67
73
|
# @return [Array] list of the first or only entries of all subfield codes in the input string
|
68
|
-
# @param s [String] subfield code specification (see
|
74
|
+
# @param s [String] subfield code specification (see match)
|
69
75
|
#
|
70
|
-
# The subfield codes are cleaned
|
71
|
-
def
|
76
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
77
|
+
def subfields(s)
|
72
78
|
assert(s.is_a?(String), 'method expects a string')
|
73
|
-
return [] unless (match_array =
|
74
|
-
|
79
|
+
return [] unless (match_array = match(s))
|
80
|
+
criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield, i) }.flatten.compact
|
75
81
|
end
|
76
82
|
|
77
83
|
# get a list of all the subfield values for all the codes in the given string
|
78
84
|
#
|
79
85
|
# @return [Array] list of the all the entries of all subfield codes in the input string
|
80
|
-
# @param s [String] subfield code
|
86
|
+
# @param s [String] subfield code criteria (see match)
|
81
87
|
#
|
82
|
-
# The subfield codes are cleaned
|
88
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
83
89
|
|
84
|
-
def
|
90
|
+
def subfields_array(s)
|
85
91
|
assert(s.is_a?(String), 'method expects a string')
|
86
|
-
return [] unless (match_array =
|
87
|
-
|
92
|
+
return [] unless (match_array = match(s))
|
93
|
+
criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield_array, i) }.flatten.compact
|
88
94
|
end
|
89
95
|
|
90
|
-
# check if the current VarField matches the given
|
96
|
+
# check if the current VarField matches the given subfield criteria.
|
91
97
|
#
|
92
|
-
# @return [String] The matching part(s) of the
|
93
|
-
# @param
|
98
|
+
# @return [String] The matching part(s) of the criteria or nil if no match
|
99
|
+
# @param criteria [String] subfield criteria: sequence of alternative set of subfield codes that should-shouldn't be present
|
94
100
|
#
|
95
|
-
# The
|
101
|
+
# The subfield criteria consists of groups of characters. At least one of these groups should match for the test to succeed
|
96
102
|
# Within the group sets of codes may be divided by a hyphen (-). The first set of codes must all be present;
|
97
103
|
# the second set of codes must all <b>not</b> be present. Either set may be empty.
|
98
104
|
#
|
@@ -123,30 +129,34 @@ module Libis
|
|
123
129
|
# '$c...$d...' => nil
|
124
130
|
# '$b...$c...$d...' => nil
|
125
131
|
# '$a...$b...$c...$d...' => nil
|
126
|
-
def
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
132
|
+
def match(criteria)
|
133
|
+
begin
|
134
|
+
parser = Libis::Tools::Metadata::SubfieldCriteriaParser.new
|
135
|
+
tree = parser.parse(criteria)
|
136
|
+
return [] if tree.is_a? String
|
137
|
+
tree = [tree] unless tree.is_a? Array
|
138
|
+
result = tree.map do |selection|
|
139
|
+
next unless parser.match_selection(selection, keys)
|
140
|
+
parser.selection_to_s(selection)
|
141
|
+
end.compact
|
142
|
+
return nil if result.empty?
|
143
|
+
result
|
144
|
+
rescue Parslet::ParseFailed => failure
|
145
|
+
failure.cause.set_label(criteria)
|
146
|
+
raise failure
|
147
|
+
end
|
138
148
|
end
|
139
149
|
|
140
150
|
private
|
141
151
|
|
142
152
|
# @return [Array] cleaned up version of the input string
|
143
|
-
# @param
|
153
|
+
# @param subfields [String] subfield code specification
|
144
154
|
# cleans the subfield code specification and splits it into an array of characters
|
145
155
|
# Duplicates will be removed from the array and the order will be untouched.
|
146
|
-
def
|
156
|
+
def criteria_to_array(subfields)
|
147
157
|
|
148
158
|
# note that we remove the '-xxx' part as it is only required for matching
|
149
|
-
|
159
|
+
subfields.gsub(/ |-\w*/, '').split('').uniq
|
150
160
|
end
|
151
161
|
|
152
162
|
def sort_helper(x)
|
@@ -186,7 +196,7 @@ module Libis
|
|
186
196
|
# # equivalent to: t.fields_array('9ab')
|
187
197
|
#
|
188
198
|
# Note that it is not possible to use a fieldspec for the sequence of subfield codes. Spaces and '-' are not allowed
|
189
|
-
# in method calls. If you want this, use the #
|
199
|
+
# in method calls. If you want this, use the #subfield(s) and #subfield(s)_array methods.
|
190
200
|
#
|
191
201
|
def method_missing(name, *args)
|
192
202
|
operation, subfields = name.to_s.split('_')
|
@@ -197,15 +207,15 @@ module Libis
|
|
197
207
|
case operation
|
198
208
|
when 'f'
|
199
209
|
if subfields.size > 1
|
200
|
-
operation = :
|
210
|
+
operation = :subfields
|
201
211
|
else
|
202
|
-
operation = :
|
212
|
+
operation = :subfield
|
203
213
|
end
|
204
214
|
when 'a'
|
205
215
|
if subfields.size > 1
|
206
|
-
operation = :
|
216
|
+
operation = :subfields_array
|
207
217
|
else
|
208
|
-
operation = :
|
218
|
+
operation = :subfield_array
|
209
219
|
end
|
210
220
|
else
|
211
221
|
throw "Unknown method invocation: '#{name}' with: #{args}"
|
@@ -73,6 +73,10 @@ module Libis
|
|
73
73
|
'float'
|
74
74
|
when DateTime, Date, Time
|
75
75
|
'datetime'
|
76
|
+
when Array
|
77
|
+
'array'
|
78
|
+
when Hash
|
79
|
+
'hash'
|
76
80
|
else
|
77
81
|
send(:default).class.name.downcase
|
78
82
|
end
|
@@ -95,9 +99,17 @@ module Libis
|
|
95
99
|
when 'datetime'
|
96
100
|
return v.to_datetime if v.respond_to? :to_datetime
|
97
101
|
return DateTime.parse(v)
|
102
|
+
when 'array'
|
103
|
+
return v if v.is_a?(Array)
|
104
|
+
return v.split(/[,;|\s]+/) if v.is_a?(String)
|
105
|
+
return v.to_a if v.respond_to?(:to_a)
|
106
|
+
when 'hash'
|
107
|
+
return v when v.is_a?(Hash)
|
108
|
+
return Hash[(0...v.size).zip(v)] when v.is_a?(Array)
|
98
109
|
else
|
99
110
|
raise RuntimeError, "Datatype not supported: '#{dtype}'"
|
100
111
|
end
|
112
|
+
nil
|
101
113
|
end
|
102
114
|
|
103
115
|
def check_constraint(v, constraint = nil)
|
@@ -145,8 +157,6 @@ module Libis
|
|
145
157
|
end
|
146
158
|
end
|
147
159
|
|
148
|
-
protected
|
149
|
-
|
150
160
|
def parameters
|
151
161
|
@parameters ||= Hash.new
|
152
162
|
end
|