libis-tools 0.9.9 → 0.9.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +4 -2
- data/README.md +19 -0
- data/lib/libis/tools.rb +1 -0
- data/lib/libis/tools/config_file.rb +1 -1
- data/lib/libis/tools/extend/kernel.rb +16 -0
- data/lib/libis/tools/metadata.rb +20 -0
- data/lib/libis/tools/metadata/dublin_core_record.rb +1 -1
- data/lib/libis/tools/metadata/{field_spec.rb → field_format.rb} +7 -7
- data/lib/libis/tools/metadata/fix_field.rb +6 -1
- data/lib/libis/tools/metadata/mapper.rb +80 -0
- data/lib/libis/tools/metadata/mappers/flandrica.rb +69 -0
- data/lib/libis/tools/metadata/mappers/kuleuven.rb +1702 -0
- data/lib/libis/tools/metadata/marc21_record.rb +5 -4
- data/lib/libis/tools/metadata/marc_record.rb +96 -37
- data/lib/libis/tools/metadata/parser/basic_parser.rb +118 -0
- data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +36 -0
- data/lib/libis/tools/metadata/parser/marc21_parser.rb +206 -0
- data/lib/libis/tools/metadata/parser/marc_format_parser.rb +52 -0
- data/lib/libis/tools/metadata/parser/marc_rules.rb +35 -0
- data/lib/libis/tools/metadata/parser/marc_select_parser.rb +25 -0
- data/lib/libis/tools/metadata/parser/patch.rb +21 -0
- data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +71 -0
- data/lib/libis/tools/metadata/parsers.rb +12 -0
- data/lib/libis/tools/metadata/var_field.rb +57 -47
- data/lib/libis/tools/parameter.rb +12 -2
- data/lib/libis/tools/version.rb +1 -1
- data/libis-tools.gemspec +4 -3
- data/spec/config_spec.rb +3 -1
- data/spec/data/MetadataMapping.xlsx +0 -0
- data/spec/metadata/8389207.marc +96 -0
- data/spec/metadata/dublin_core_parser_spec.rb +48 -0
- data/spec/metadata/marc21_parser_data.rb +382 -0
- data/spec/metadata/marc21_parser_spec.rb +67 -0
- data/spec/metadata/marc21_spec.rb +116 -0
- data/spec/metadata/metadata_mapper_spec.rb +23 -0
- data/spec/spec_helper.rb +13 -0
- data/test.rb +61 -0
- metadata +77 -7
- data/lib/libis/tools/dc_record.rb +0 -47
@@ -0,0 +1,52 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
require_relative 'basic_parser'
|
6
|
+
require_relative 'marc_rules'
|
7
|
+
|
8
|
+
module Libis
|
9
|
+
module Tools
|
10
|
+
module Metadata
|
11
|
+
|
12
|
+
# noinspection RubyResolve
|
13
|
+
class MarcFormatParser < Libis::Tools::Metadata::BasicParser
|
14
|
+
include Libis::Tools::Metadata::MarcRules
|
15
|
+
|
16
|
+
root(:mapping)
|
17
|
+
|
18
|
+
rule(:mapping) { entry.repeat(1).as(:entry) >> postfix?.as(:postfix) }
|
19
|
+
|
20
|
+
rule(:entry) { group.as(:group) | sf_reference }
|
21
|
+
rule(:group) { prefix?.as(:prefix) >> grouped(mapping) }
|
22
|
+
|
23
|
+
# pre- and postfix
|
24
|
+
rule(:prefix) { other.repeat(1) }
|
25
|
+
rule(:prefix) { text }
|
26
|
+
rule(:prefix?) { prefix.maybe }
|
27
|
+
rule(:postfix) { other.repeat(1) }
|
28
|
+
rule(:postfix) { text }
|
29
|
+
rule(:postfix?) { postfix.maybe }
|
30
|
+
|
31
|
+
# subfield reference
|
32
|
+
rule(:sf_reference) { sf_variable.as(:subfield) | sf_fixed.as(:fixfield) }
|
33
|
+
|
34
|
+
rule(:sf_variable) { prefix?.as(:prefix) >> sf_indicator >> sf_repeat?.as(:repeat) >> sf_name }
|
35
|
+
rule(:sf_repeat) { star >>
|
36
|
+
(dquote >> not_dquote.repeat.as(:separator) >> dquote |
|
37
|
+
squote >> not_squote.repeat.as(:separator) >> squote
|
38
|
+
).maybe
|
39
|
+
}
|
40
|
+
rule(:sf_repeat?) { sf_repeat.maybe }
|
41
|
+
|
42
|
+
rule(:sf_fixed) { prefix?.as(:prefix) >> sf_indicator >> str('@') >> (sf_position | sf_range | sf_star) }
|
43
|
+
rule(:sf_position) { lsparen >> integer.as(:position) >> rsparen }
|
44
|
+
rule(:sf_range) { lsparen >> integer.as(:first) >> minus >> integer.as(:last) >> rsparen }
|
45
|
+
rule(:sf_star) { star.as(:all) }
|
46
|
+
|
47
|
+
rule(:other) { paren.absent? >> dollar.absent? >> any | str('$$') }
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Tools
|
7
|
+
module Metadata
|
8
|
+
|
9
|
+
# noinspection RubyResolve
|
10
|
+
module MarcRules
|
11
|
+
include Parslet
|
12
|
+
|
13
|
+
# tag
|
14
|
+
rule(:tag) { tag_numeric | tag_alpha }
|
15
|
+
rule(:tag_numeric) { number.repeat(3, 3) }
|
16
|
+
rule(:tag_alpha) { character.repeat(3, 3) }
|
17
|
+
|
18
|
+
# indicator
|
19
|
+
rule(:indicator) { hashtag | underscore | number | character }
|
20
|
+
rule(:indicator?) { indicator.maybe }
|
21
|
+
rule(:indicators) { indicator?.as(:ind1) >> indicator?.as(:ind2) }
|
22
|
+
|
23
|
+
# subfield
|
24
|
+
rule(:sf_indicator) { dollar }
|
25
|
+
rule(:sf_name) { (character | number).as(:name) }
|
26
|
+
rule(:sf_name?) { sf_name.maybe }
|
27
|
+
rule(:sf_names) { (character | number).repeat(1).as(:names) }
|
28
|
+
rule(:sf_names?) { sf_names.maybe }
|
29
|
+
rule(:subfield) { sf_indicator >> sf_name }
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
require_relative 'basic_parser'
|
6
|
+
require_relative 'marc_rules'
|
7
|
+
|
8
|
+
module Libis
|
9
|
+
module Tools
|
10
|
+
module Metadata
|
11
|
+
|
12
|
+
# noinspection RubyResolve
|
13
|
+
class MarcSelectParser < Libis::Tools::Metadata::BasicParser
|
14
|
+
include Libis::Tools::Metadata::MarcRules
|
15
|
+
root(:MARC)
|
16
|
+
rule(:MARC) { str('MARC') >> spaces? >> tag.as(:tag) >> spaces? >> indicators >> spaces? >> subfield.maybe.as(:subfield) }
|
17
|
+
|
18
|
+
# subfield
|
19
|
+
# rule(:sf_condition) { sf_indicator >> sf_names >> (space >> sf_names).repeat }
|
20
|
+
# rule(:sf_names) { sf_name.repeat(1) }
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class Parslet::Pattern
|
2
|
+
|
3
|
+
def element_match_hash(tree, exp, bindings)
|
4
|
+
return false if exp.size < tree.size
|
5
|
+
exp.each do |expected_key, expected_value|
|
6
|
+
if expected_key.to_s =~ /^(.*)\?$/
|
7
|
+
expected_key = expected_key.is_a?(Symbol) ? $1.to_sym : $1
|
8
|
+
return true unless tree.has_key? expected_key
|
9
|
+
end
|
10
|
+
|
11
|
+
return false unless tree.has_key? expected_key
|
12
|
+
|
13
|
+
# Recurse into the value and stop early on failure
|
14
|
+
value = tree[expected_key]
|
15
|
+
return false unless element_match(value, expected_value, bindings)
|
16
|
+
end
|
17
|
+
|
18
|
+
true
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'parslet'
|
4
|
+
|
5
|
+
require_relative 'basic_parser'
|
6
|
+
|
7
|
+
module Libis
|
8
|
+
module Tools
|
9
|
+
module Metadata
|
10
|
+
|
11
|
+
# noinspection RubyResolve
|
12
|
+
class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
|
13
|
+
|
14
|
+
root(:criteria)
|
15
|
+
|
16
|
+
rule(:criteria) { selection >> (spaces >> selection).repeat }
|
17
|
+
|
18
|
+
rule(:selection) { must >> must_not.maybe }
|
19
|
+
|
20
|
+
rule(:must) { names.as(:must).maybe >> (one_of | only_one_of).maybe }
|
21
|
+
rule(:must_not) { minus >> must.as(:not) }
|
22
|
+
|
23
|
+
rule(:one_of) { lrparen >> names.as(:one_of) >> rrparen }
|
24
|
+
rule(:only_one_of) { lcparen >> names.as(:only_one_of) >> rcparen }
|
25
|
+
|
26
|
+
rule(:names) { (character | number).repeat(1) }
|
27
|
+
|
28
|
+
def criteria_to_s(criteria)
|
29
|
+
case criteria
|
30
|
+
when Array
|
31
|
+
# leave as is
|
32
|
+
when Hash
|
33
|
+
criteria = [criteria]
|
34
|
+
else
|
35
|
+
return criteria
|
36
|
+
end
|
37
|
+
criteria.map { |selection| selection_to_s(selection) }.join(' ')
|
38
|
+
end
|
39
|
+
|
40
|
+
def selection_to_s(selection)
|
41
|
+
return selection unless selection.is_a? Hash
|
42
|
+
result = "#{selection[:must]}"
|
43
|
+
result += "(#{selection[:one_of]})" if selection[:one_of]
|
44
|
+
result += "{#{selection[:only_one_of]}}" if selection[:only_one_of]
|
45
|
+
result += "-#{selection_to_s(selection[:not])}" if selection[:not]
|
46
|
+
result
|
47
|
+
end
|
48
|
+
|
49
|
+
def match_criteria(criteria, data)
|
50
|
+
tree = self.new.parse(criteria)
|
51
|
+
return true if tree.is_a? String
|
52
|
+
tree = [tree] unless tree.is_a? Array
|
53
|
+
tree.map { |selection| match_selection(selection, data) }.any?
|
54
|
+
end
|
55
|
+
|
56
|
+
def match_selection(selection, data)
|
57
|
+
must_match = selection[:must].to_s.split('')
|
58
|
+
return false unless must_match == (must_match & data)
|
59
|
+
one_of = selection[:one_of].to_s.split('')
|
60
|
+
return false unless one_of.empty? || (one_of & data).any?
|
61
|
+
only_one_of = selection[:only_one_of].to_s.split('')
|
62
|
+
return false unless only_one_of.empty? || (only_one_of & data).size != 1
|
63
|
+
return false if match_selection(selection[:not], data) if selection[:not]
|
64
|
+
true
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module Libis
|
2
|
+
module Tools
|
3
|
+
module Metadata
|
4
|
+
|
5
|
+
autoload :BasicParser, 'libis/tools/metadata/parser/basic_parser'
|
6
|
+
autoload :DublinCoreParser, 'libis/tools/metadata/parser/dublin_core_parser'
|
7
|
+
autoload :Marc21Parser, 'libis/tools/metadata/parser/marc21_parser'
|
8
|
+
autoload :SubfieldCriteriaParser, 'libis/tools/metadata/parser/subfield_criteria_parser'
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
require 'libis/tools/assert'
|
4
4
|
|
5
|
+
require_relative 'parser/subfield_criteria_parser'
|
6
|
+
|
5
7
|
module Libis
|
6
8
|
module Tools
|
7
9
|
module Metadata
|
@@ -11,13 +13,17 @@ module Libis
|
|
11
13
|
attr_reader :tag
|
12
14
|
attr_reader :ind1
|
13
15
|
attr_reader :ind2
|
14
|
-
attr_reader :
|
16
|
+
attr_reader :subfield_data
|
15
17
|
|
16
|
-
def initialize(tag, ind1, ind2
|
18
|
+
def initialize(tag, ind1, ind2)
|
17
19
|
@tag = tag
|
18
|
-
@ind1 = ind1 || '
|
19
|
-
@ind2 = ind2 || '
|
20
|
-
@
|
20
|
+
@ind1 = ind1 || ''
|
21
|
+
@ind2 = ind2 || ''
|
22
|
+
@subfield_data = Hash.new { |h, k| h[k] = Array.new }
|
23
|
+
end
|
24
|
+
|
25
|
+
def add_subfield(name, value)
|
26
|
+
@subfield_data[name] << value
|
21
27
|
end
|
22
28
|
|
23
29
|
# dump the contents
|
@@ -25,7 +31,7 @@ module Libis
|
|
25
31
|
# @return [String] debug output to inspect the contents of the VarField
|
26
32
|
def dump
|
27
33
|
output = "#{@tag}:#{@ind1}:#{@ind2}:\n"
|
28
|
-
@
|
34
|
+
@subfield_data.each { |s, t| output += "\t#{s}:#{t}\n" }
|
29
35
|
output
|
30
36
|
end
|
31
37
|
|
@@ -34,7 +40,7 @@ module Libis
|
|
34
40
|
# @return [String] debug output to inspect the contents of the VarField - Single line version
|
35
41
|
def dump_line
|
36
42
|
output = "#{@tag}:#{@ind1}:#{@ind2}:"
|
37
|
-
@
|
43
|
+
@subfield_data.each { |s, t| output += "$#{s}#{t}" }
|
38
44
|
output
|
39
45
|
end
|
40
46
|
|
@@ -42,57 +48,57 @@ module Libis
|
|
42
48
|
#
|
43
49
|
# @return [Array] a list of all subfield codes
|
44
50
|
def keys
|
45
|
-
@
|
51
|
+
@subfield_data.keys
|
46
52
|
end
|
47
53
|
|
48
54
|
# get the first (or only) subfield value for the given code
|
49
55
|
#
|
50
56
|
# @return [String] the first or only entry of a subfield or nil if not present
|
51
57
|
# @param s [Character] the subfield code
|
52
|
-
def
|
53
|
-
|
58
|
+
def subfield(s)
|
59
|
+
subfield_array(s).first
|
54
60
|
end
|
55
61
|
|
56
62
|
# get a list of all subfield values for a given code
|
57
63
|
#
|
58
64
|
# @return [Array] all the entries of a repeatable subfield
|
59
65
|
# @param s [Character] the subfield code
|
60
|
-
def
|
66
|
+
def subfield_array(s)
|
61
67
|
assert(s.is_a?(String) && (s =~ /^[\da-z]$/) == 0, 'method expects a lower case alphanumerical char')
|
62
|
-
@
|
68
|
+
@subfield_data.has_key?(s) ? @subfield_data[s].dup : []
|
63
69
|
end
|
64
70
|
|
65
|
-
# get a list of the first subfield
|
71
|
+
# get a list of the first subfield values for all the codes in the given string
|
66
72
|
#
|
67
73
|
# @return [Array] list of the first or only entries of all subfield codes in the input string
|
68
|
-
# @param s [String] subfield code specification (see
|
74
|
+
# @param s [String] subfield code specification (see match)
|
69
75
|
#
|
70
|
-
# The subfield codes are cleaned
|
71
|
-
def
|
76
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
77
|
+
def subfields(s)
|
72
78
|
assert(s.is_a?(String), 'method expects a string')
|
73
|
-
return [] unless (match_array =
|
74
|
-
|
79
|
+
return [] unless (match_array = match(s))
|
80
|
+
criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield, i) }.flatten.compact
|
75
81
|
end
|
76
82
|
|
77
83
|
# get a list of all the subfield values for all the codes in the given string
|
78
84
|
#
|
79
85
|
# @return [Array] list of the all the entries of all subfield codes in the input string
|
80
|
-
# @param s [String] subfield code
|
86
|
+
# @param s [String] subfield code criteria (see match)
|
81
87
|
#
|
82
|
-
# The subfield codes are cleaned
|
88
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
83
89
|
|
84
|
-
def
|
90
|
+
def subfields_array(s)
|
85
91
|
assert(s.is_a?(String), 'method expects a string')
|
86
|
-
return [] unless (match_array =
|
87
|
-
|
92
|
+
return [] unless (match_array = match(s))
|
93
|
+
criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield_array, i) }.flatten.compact
|
88
94
|
end
|
89
95
|
|
90
|
-
# check if the current VarField matches the given
|
96
|
+
# check if the current VarField matches the given subfield criteria.
|
91
97
|
#
|
92
|
-
# @return [String] The matching part(s) of the
|
93
|
-
# @param
|
98
|
+
# @return [String] The matching part(s) of the criteria or nil if no match
|
99
|
+
# @param criteria [String] subfield criteria: sequence of alternative set of subfield codes that should-shouldn't be present
|
94
100
|
#
|
95
|
-
# The
|
101
|
+
# The subfield criteria consists of groups of characters. At least one of these groups should match for the test to succeed
|
96
102
|
# Within the group sets of codes may be divided by a hyphen (-). The first set of codes must all be present;
|
97
103
|
# the second set of codes must all <b>not</b> be present. Either set may be empty.
|
98
104
|
#
|
@@ -123,30 +129,34 @@ module Libis
|
|
123
129
|
# '$c...$d...' => nil
|
124
130
|
# '$b...$c...$d...' => nil
|
125
131
|
# '$a...$b...$c...$d...' => nil
|
126
|
-
def
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
132
|
+
def match(criteria)
|
133
|
+
begin
|
134
|
+
parser = Libis::Tools::Metadata::SubfieldCriteriaParser.new
|
135
|
+
tree = parser.parse(criteria)
|
136
|
+
return [] if tree.is_a? String
|
137
|
+
tree = [tree] unless tree.is_a? Array
|
138
|
+
result = tree.map do |selection|
|
139
|
+
next unless parser.match_selection(selection, keys)
|
140
|
+
parser.selection_to_s(selection)
|
141
|
+
end.compact
|
142
|
+
return nil if result.empty?
|
143
|
+
result
|
144
|
+
rescue Parslet::ParseFailed => failure
|
145
|
+
failure.cause.set_label(criteria)
|
146
|
+
raise failure
|
147
|
+
end
|
138
148
|
end
|
139
149
|
|
140
150
|
private
|
141
151
|
|
142
152
|
# @return [Array] cleaned up version of the input string
|
143
|
-
# @param
|
153
|
+
# @param subfields [String] subfield code specification
|
144
154
|
# cleans the subfield code specification and splits it into an array of characters
|
145
155
|
# Duplicates will be removed from the array and the order will be untouched.
|
146
|
-
def
|
156
|
+
def criteria_to_array(subfields)
|
147
157
|
|
148
158
|
# note that we remove the '-xxx' part as it is only required for matching
|
149
|
-
|
159
|
+
subfields.gsub(/ |-\w*/, '').split('').uniq
|
150
160
|
end
|
151
161
|
|
152
162
|
def sort_helper(x)
|
@@ -186,7 +196,7 @@ module Libis
|
|
186
196
|
# # equivalent to: t.fields_array('9ab')
|
187
197
|
#
|
188
198
|
# Note that it is not possible to use a fieldspec for the sequence of subfield codes. Spaces and '-' are not allowed
|
189
|
-
# in method calls. If you want this, use the #
|
199
|
+
# in method calls. If you want this, use the #subfield(s) and #subfield(s)_array methods.
|
190
200
|
#
|
191
201
|
def method_missing(name, *args)
|
192
202
|
operation, subfields = name.to_s.split('_')
|
@@ -197,15 +207,15 @@ module Libis
|
|
197
207
|
case operation
|
198
208
|
when 'f'
|
199
209
|
if subfields.size > 1
|
200
|
-
operation = :
|
210
|
+
operation = :subfields
|
201
211
|
else
|
202
|
-
operation = :
|
212
|
+
operation = :subfield
|
203
213
|
end
|
204
214
|
when 'a'
|
205
215
|
if subfields.size > 1
|
206
|
-
operation = :
|
216
|
+
operation = :subfields_array
|
207
217
|
else
|
208
|
-
operation = :
|
218
|
+
operation = :subfield_array
|
209
219
|
end
|
210
220
|
else
|
211
221
|
throw "Unknown method invocation: '#{name}' with: #{args}"
|
@@ -73,6 +73,10 @@ module Libis
|
|
73
73
|
'float'
|
74
74
|
when DateTime, Date, Time
|
75
75
|
'datetime'
|
76
|
+
when Array
|
77
|
+
'array'
|
78
|
+
when Hash
|
79
|
+
'hash'
|
76
80
|
else
|
77
81
|
send(:default).class.name.downcase
|
78
82
|
end
|
@@ -95,9 +99,17 @@ module Libis
|
|
95
99
|
when 'datetime'
|
96
100
|
return v.to_datetime if v.respond_to? :to_datetime
|
97
101
|
return DateTime.parse(v)
|
102
|
+
when 'array'
|
103
|
+
return v if v.is_a?(Array)
|
104
|
+
return v.split(/[,;|\s]+/) if v.is_a?(String)
|
105
|
+
return v.to_a if v.respond_to?(:to_a)
|
106
|
+
when 'hash'
|
107
|
+
return v when v.is_a?(Hash)
|
108
|
+
return Hash[(0...v.size).zip(v)] when v.is_a?(Array)
|
98
109
|
else
|
99
110
|
raise RuntimeError, "Datatype not supported: '#{dtype}'"
|
100
111
|
end
|
112
|
+
nil
|
101
113
|
end
|
102
114
|
|
103
115
|
def check_constraint(v, constraint = nil)
|
@@ -145,8 +157,6 @@ module Libis
|
|
145
157
|
end
|
146
158
|
end
|
147
159
|
|
148
|
-
protected
|
149
|
-
|
150
160
|
def parameters
|
151
161
|
@parameters ||= Hash.new
|
152
162
|
end
|