libis-tools 0.9.9 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -2
  3. data/README.md +19 -0
  4. data/lib/libis/tools.rb +1 -0
  5. data/lib/libis/tools/config_file.rb +1 -1
  6. data/lib/libis/tools/extend/kernel.rb +16 -0
  7. data/lib/libis/tools/metadata.rb +20 -0
  8. data/lib/libis/tools/metadata/dublin_core_record.rb +1 -1
  9. data/lib/libis/tools/metadata/{field_spec.rb → field_format.rb} +7 -7
  10. data/lib/libis/tools/metadata/fix_field.rb +6 -1
  11. data/lib/libis/tools/metadata/mapper.rb +80 -0
  12. data/lib/libis/tools/metadata/mappers/flandrica.rb +69 -0
  13. data/lib/libis/tools/metadata/mappers/kuleuven.rb +1702 -0
  14. data/lib/libis/tools/metadata/marc21_record.rb +5 -4
  15. data/lib/libis/tools/metadata/marc_record.rb +96 -37
  16. data/lib/libis/tools/metadata/parser/basic_parser.rb +118 -0
  17. data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +36 -0
  18. data/lib/libis/tools/metadata/parser/marc21_parser.rb +206 -0
  19. data/lib/libis/tools/metadata/parser/marc_format_parser.rb +52 -0
  20. data/lib/libis/tools/metadata/parser/marc_rules.rb +35 -0
  21. data/lib/libis/tools/metadata/parser/marc_select_parser.rb +25 -0
  22. data/lib/libis/tools/metadata/parser/patch.rb +21 -0
  23. data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +71 -0
  24. data/lib/libis/tools/metadata/parsers.rb +12 -0
  25. data/lib/libis/tools/metadata/var_field.rb +57 -47
  26. data/lib/libis/tools/parameter.rb +12 -2
  27. data/lib/libis/tools/version.rb +1 -1
  28. data/libis-tools.gemspec +4 -3
  29. data/spec/config_spec.rb +3 -1
  30. data/spec/data/MetadataMapping.xlsx +0 -0
  31. data/spec/metadata/8389207.marc +96 -0
  32. data/spec/metadata/dublin_core_parser_spec.rb +48 -0
  33. data/spec/metadata/marc21_parser_data.rb +382 -0
  34. data/spec/metadata/marc21_parser_spec.rb +67 -0
  35. data/spec/metadata/marc21_spec.rb +116 -0
  36. data/spec/metadata/metadata_mapper_spec.rb +23 -0
  37. data/spec/spec_helper.rb +13 -0
  38. data/test.rb +61 -0
  39. metadata +77 -7
  40. data/lib/libis/tools/dc_record.rb +0 -47
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ require_relative 'basic_parser'
6
+ require_relative 'marc_rules'
7
+
8
+ module Libis
9
+ module Tools
10
+ module Metadata
11
+
12
+ # noinspection RubyResolve
13
+ class MarcFormatParser < Libis::Tools::Metadata::BasicParser
14
+ include Libis::Tools::Metadata::MarcRules
15
+
16
+ root(:mapping)
17
+
18
+ rule(:mapping) { entry.repeat(1).as(:entry) >> postfix?.as(:postfix) }
19
+
20
+ rule(:entry) { group.as(:group) | sf_reference }
21
+ rule(:group) { prefix?.as(:prefix) >> grouped(mapping) }
22
+
23
+ # pre- and postfix
24
+ rule(:prefix) { other.repeat(1) }
25
+ rule(:prefix) { text }
26
+ rule(:prefix?) { prefix.maybe }
27
+ rule(:postfix) { other.repeat(1) }
28
+ rule(:postfix) { text }
29
+ rule(:postfix?) { postfix.maybe }
30
+
31
+ # subfield reference
32
+ rule(:sf_reference) { sf_variable.as(:subfield) | sf_fixed.as(:fixfield) }
33
+
34
+ rule(:sf_variable) { prefix?.as(:prefix) >> sf_indicator >> sf_repeat?.as(:repeat) >> sf_name }
35
+ rule(:sf_repeat) { star >>
36
+ (dquote >> not_dquote.repeat.as(:separator) >> dquote |
37
+ squote >> not_squote.repeat.as(:separator) >> squote
38
+ ).maybe
39
+ }
40
+ rule(:sf_repeat?) { sf_repeat.maybe }
41
+
42
+ rule(:sf_fixed) { prefix?.as(:prefix) >> sf_indicator >> str('@') >> (sf_position | sf_range | sf_star) }
43
+ rule(:sf_position) { lsparen >> integer.as(:position) >> rsparen }
44
+ rule(:sf_range) { lsparen >> integer.as(:first) >> minus >> integer.as(:last) >> rsparen }
45
+ rule(:sf_star) { star.as(:all) }
46
+
47
+ rule(:other) { paren.absent? >> dollar.absent? >> any | str('$$') }
48
+ end
49
+
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ module Libis
6
+ module Tools
7
+ module Metadata
8
+
9
+ # noinspection RubyResolve
10
+ module MarcRules
11
+ include Parslet
12
+
13
+ # tag
14
+ rule(:tag) { tag_numeric | tag_alpha }
15
+ rule(:tag_numeric) { number.repeat(3, 3) }
16
+ rule(:tag_alpha) { character.repeat(3, 3) }
17
+
18
+ # indicator
19
+ rule(:indicator) { hashtag | underscore | number | character }
20
+ rule(:indicator?) { indicator.maybe }
21
+ rule(:indicators) { indicator?.as(:ind1) >> indicator?.as(:ind2) }
22
+
23
+ # subfield
24
+ rule(:sf_indicator) { dollar }
25
+ rule(:sf_name) { (character | number).as(:name) }
26
+ rule(:sf_name?) { sf_name.maybe }
27
+ rule(:sf_names) { (character | number).repeat(1).as(:names) }
28
+ rule(:sf_names?) { sf_names.maybe }
29
+ rule(:subfield) { sf_indicator >> sf_name }
30
+
31
+ end
32
+
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ require_relative 'basic_parser'
6
+ require_relative 'marc_rules'
7
+
8
+ module Libis
9
+ module Tools
10
+ module Metadata
11
+
12
+ # noinspection RubyResolve
13
+ class MarcSelectParser < Libis::Tools::Metadata::BasicParser
14
+ include Libis::Tools::Metadata::MarcRules
15
+ root(:MARC)
16
+ rule(:MARC) { str('MARC') >> spaces? >> tag.as(:tag) >> spaces? >> indicators >> spaces? >> subfield.maybe.as(:subfield) }
17
+
18
+ # subfield
19
+ # rule(:sf_condition) { sf_indicator >> sf_names >> (space >> sf_names).repeat }
20
+ # rule(:sf_names) { sf_name.repeat(1) }
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ class Parslet::Pattern
2
+
3
+ def element_match_hash(tree, exp, bindings)
4
+ return false if exp.size < tree.size
5
+ exp.each do |expected_key, expected_value|
6
+ if expected_key.to_s =~ /^(.*)\?$/
7
+ expected_key = expected_key.is_a?(Symbol) ? $1.to_sym : $1
8
+ return true unless tree.has_key? expected_key
9
+ end
10
+
11
+ return false unless tree.has_key? expected_key
12
+
13
+ # Recurse into the value and stop early on failure
14
+ value = tree[expected_key]
15
+ return false unless element_match(value, expected_value, bindings)
16
+ end
17
+
18
+ true
19
+ end
20
+
21
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ require_relative 'basic_parser'
6
+
7
+ module Libis
8
+ module Tools
9
+ module Metadata
10
+
11
+ # noinspection RubyResolve
12
+ class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
13
+
14
+ root(:criteria)
15
+
16
+ rule(:criteria) { selection >> (spaces >> selection).repeat }
17
+
18
+ rule(:selection) { must >> must_not.maybe }
19
+
20
+ rule(:must) { names.as(:must).maybe >> (one_of | only_one_of).maybe }
21
+ rule(:must_not) { minus >> must.as(:not) }
22
+
23
+ rule(:one_of) { lrparen >> names.as(:one_of) >> rrparen }
24
+ rule(:only_one_of) { lcparen >> names.as(:only_one_of) >> rcparen }
25
+
26
+ rule(:names) { (character | number).repeat(1) }
27
+
28
+ def criteria_to_s(criteria)
29
+ case criteria
30
+ when Array
31
+ # leave as is
32
+ when Hash
33
+ criteria = [criteria]
34
+ else
35
+ return criteria
36
+ end
37
+ criteria.map { |selection| selection_to_s(selection) }.join(' ')
38
+ end
39
+
40
+ def selection_to_s(selection)
41
+ return selection unless selection.is_a? Hash
42
+ result = "#{selection[:must]}"
43
+ result += "(#{selection[:one_of]})" if selection[:one_of]
44
+ result += "{#{selection[:only_one_of]}}" if selection[:only_one_of]
45
+ result += "-#{selection_to_s(selection[:not])}" if selection[:not]
46
+ result
47
+ end
48
+
49
+ def match_criteria(criteria, data)
50
+ tree = self.new.parse(criteria)
51
+ return true if tree.is_a? String
52
+ tree = [tree] unless tree.is_a? Array
53
+ tree.map { |selection| match_selection(selection, data) }.any?
54
+ end
55
+
56
+ def match_selection(selection, data)
57
+ must_match = selection[:must].to_s.split('')
58
+ return false unless must_match == (must_match & data)
59
+ one_of = selection[:one_of].to_s.split('')
60
+ return false unless one_of.empty? || (one_of & data).any?
61
+ only_one_of = selection[:only_one_of].to_s.split('')
62
+ return false unless only_one_of.empty? || (only_one_of & data).size != 1
63
+ return false if match_selection(selection[:not], data) if selection[:not]
64
+ true
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,12 @@
1
+ module Libis
2
+ module Tools
3
+ module Metadata
4
+
5
+ autoload :BasicParser, 'libis/tools/metadata/parser/basic_parser'
6
+ autoload :DublinCoreParser, 'libis/tools/metadata/parser/dublin_core_parser'
7
+ autoload :Marc21Parser, 'libis/tools/metadata/parser/marc21_parser'
8
+ autoload :SubfieldCriteriaParser, 'libis/tools/metadata/parser/subfield_criteria_parser'
9
+
10
+ end
11
+ end
12
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'libis/tools/assert'
4
4
 
5
+ require_relative 'parser/subfield_criteria_parser'
6
+
5
7
  module Libis
6
8
  module Tools
7
9
  module Metadata
@@ -11,13 +13,17 @@ module Libis
11
13
  attr_reader :tag
12
14
  attr_reader :ind1
13
15
  attr_reader :ind2
14
- attr_reader :subfield
16
+ attr_reader :subfield_data
15
17
 
16
- def initialize(tag, ind1, ind2, subfield = {})
18
+ def initialize(tag, ind1, ind2)
17
19
  @tag = tag
18
- @ind1 = ind1 || ' '
19
- @ind2 = ind2 || ' '
20
- @subfield = subfield || {}
20
+ @ind1 = ind1 || ''
21
+ @ind2 = ind2 || ''
22
+ @subfield_data = Hash.new { |h, k| h[k] = Array.new }
23
+ end
24
+
25
+ def add_subfield(name, value)
26
+ @subfield_data[name] << value
21
27
  end
22
28
 
23
29
  # dump the contents
@@ -25,7 +31,7 @@ module Libis
25
31
  # @return [String] debug output to inspect the contents of the VarField
26
32
  def dump
27
33
  output = "#{@tag}:#{@ind1}:#{@ind2}:\n"
28
- @subfield.each { |s, t| output += "\t#{s}:#{t}\n" }
34
+ @subfield_data.each { |s, t| output += "\t#{s}:#{t}\n" }
29
35
  output
30
36
  end
31
37
 
@@ -34,7 +40,7 @@ module Libis
34
40
  # @return [String] debug output to inspect the contents of the VarField - Single line version
35
41
  def dump_line
36
42
  output = "#{@tag}:#{@ind1}:#{@ind2}:"
37
- @subfield.each { |s, t| output += "$#{s}#{t}" }
43
+ @subfield_data.each { |s, t| output += "$#{s}#{t}" }
38
44
  output
39
45
  end
40
46
 
@@ -42,57 +48,57 @@ module Libis
42
48
  #
43
49
  # @return [Array] a list of all subfield codes
44
50
  def keys
45
- @subfield.keys
51
+ @subfield_data.keys
46
52
  end
47
53
 
48
54
  # get the first (or only) subfield value for the given code
49
55
  #
50
56
  # @return [String] the first or only entry of a subfield or nil if not present
51
57
  # @param s [Character] the subfield code
52
- def field(s)
53
- field_array(s).first
58
+ def subfield(s)
59
+ subfield_array(s).first
54
60
  end
55
61
 
56
62
  # get a list of all subfield values for a given code
57
63
  #
58
64
  # @return [Array] all the entries of a repeatable subfield
59
65
  # @param s [Character] the subfield code
60
- def field_array(s)
66
+ def subfield_array(s)
61
67
  assert(s.is_a?(String) && (s =~ /^[\da-z]$/) == 0, 'method expects a lower case alphanumerical char')
62
- @subfield.has_key?(s) ? @subfield[s].dup : []
68
+ @subfield_data.has_key?(s) ? @subfield_data[s].dup : []
63
69
  end
64
70
 
65
- # get a list of the first subfield value for all the codes in the given string
71
+ # get a list of the first subfield values for all the codes in the given string
66
72
  #
67
73
  # @return [Array] list of the first or only entries of all subfield codes in the input string
68
- # @param s [String] subfield code specification (see match_fieldspec?)
74
+ # @param s [String] subfield code specification (see match)
69
75
  #
70
- # The subfield codes are cleaned and sorted first (see fieldspec_to_sorted_array)
71
- def fields(s)
76
+ # The subfield codes are cleaned (see criteria_to_array)
77
+ def subfields(s)
72
78
  assert(s.is_a?(String), 'method expects a string')
73
- return [] unless (match_array = match_fieldspec?(s))
74
- fieldspec_to_array(match_array.join(' ')).collect { |i| send(:field, i) }.flatten.compact
79
+ return [] unless (match_array = match(s))
80
+ criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield, i) }.flatten.compact
75
81
  end
76
82
 
77
83
  # get a list of all the subfield values for all the codes in the given string
78
84
  #
79
85
  # @return [Array] list of the all the entries of all subfield codes in the input string
80
- # @param s [String] subfield code specification (see match_fieldspec?)
86
+ # @param s [String] subfield code criteria (see match)
81
87
  #
82
- # The subfield codes are cleaned and sorted first (see fieldspec_to_sorted_array)
88
+ # The subfield codes are cleaned (see criteria_to_array)
83
89
 
84
- def fields_array(s)
90
+ def subfields_array(s)
85
91
  assert(s.is_a?(String), 'method expects a string')
86
- return [] unless (match_array = match_fieldspec?(s))
87
- fieldspec_to_array(match_array.join(' ')).collect { |i| send(:field_array, i) }.flatten.compact
92
+ return [] unless (match_array = match(s))
93
+ criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield_array, i) }.flatten.compact
88
94
  end
89
95
 
90
- # check if the current VarField matches the given field specification.
96
+ # check if the current VarField matches the given subfield criteria.
91
97
  #
92
- # @return [String] The matching part(s) of the specification or nil if no match
93
- # @param fieldspec [String] field specification: sequence of alternative set of subfield codes that should-shouldn't be present
98
+ # @return [String] The matching part(s) of the criteria or nil if no match
99
+ # @param criteria [String] subfield criteria: sequence of alternative set of subfield codes that should-shouldn't be present
94
100
  #
95
- # The fieldspec consists of groups of characters. At least one of these groups should match for the test to succeed
101
+ # The subfield criteria consists of groups of characters. At least one of these groups should match for the test to succeed
96
102
  # Within the group sets of codes may be divided by a hyphen (-). The first set of codes must all be present;
97
103
  # the second set of codes must all <b>not</b> be present. Either set may be empty.
98
104
  #
@@ -123,30 +129,34 @@ module Libis
123
129
  # '$c...$d...' => nil
124
130
  # '$b...$c...$d...' => nil
125
131
  # '$a...$b...$c...$d...' => nil
126
- def match_fieldspec?(fieldspec)
127
- return [] if fieldspec.empty?
128
- result = fieldspec.split.collect { |fs|
129
- fa = fs.split '-'
130
- assert(fa.size <= 2, 'more than one "-" is not allowed in a fieldspec')
131
- must_match = (fa[0] || '').split ''
132
- must_not_match = (fa[1] || '').split ''
133
- next unless (must_match == (must_match & keys)) && (must_not_match & keys).empty?
134
- fs
135
- }.compact
136
- return nil if result.empty?
137
- result
132
+ def match(criteria)
133
+ begin
134
+ parser = Libis::Tools::Metadata::SubfieldCriteriaParser.new
135
+ tree = parser.parse(criteria)
136
+ return [] if tree.is_a? String
137
+ tree = [tree] unless tree.is_a? Array
138
+ result = tree.map do |selection|
139
+ next unless parser.match_selection(selection, keys)
140
+ parser.selection_to_s(selection)
141
+ end.compact
142
+ return nil if result.empty?
143
+ result
144
+ rescue Parslet::ParseFailed => failure
145
+ failure.cause.set_label(criteria)
146
+ raise failure
147
+ end
138
148
  end
139
149
 
140
150
  private
141
151
 
142
152
  # @return [Array] cleaned up version of the input string
143
- # @param fieldspec [String] subfield code specification
153
+ # @param subfields [String] subfield code specification
144
154
  # cleans the subfield code specification and splits it into an array of characters
145
155
  # Duplicates will be removed from the array and the order will be untouched.
146
- def fieldspec_to_array(fieldspec)
156
+ def criteria_to_array(subfields)
147
157
 
148
158
  # note that we remove the '-xxx' part as it is only required for matching
149
- fieldspec.gsub(/ |-\w*/, '').split('').uniq
159
+ subfields.gsub(/ |-\w*/, '').split('').uniq
150
160
  end
151
161
 
152
162
  def sort_helper(x)
@@ -186,7 +196,7 @@ module Libis
186
196
  # # equivalent to: t.fields_array('9ab')
187
197
  #
188
198
  # Note that it is not possible to use a fieldspec for the sequence of subfield codes. Spaces and '-' are not allowed
189
- # in method calls. If you want this, use the #field(s) and #field(s)_array methods.
199
+ # in method calls. If you want this, use the #subfield(s) and #subfield(s)_array methods.
190
200
  #
191
201
  def method_missing(name, *args)
192
202
  operation, subfields = name.to_s.split('_')
@@ -197,15 +207,15 @@ module Libis
197
207
  case operation
198
208
  when 'f'
199
209
  if subfields.size > 1
200
- operation = :fields
210
+ operation = :subfields
201
211
  else
202
- operation = :field
212
+ operation = :subfield
203
213
  end
204
214
  when 'a'
205
215
  if subfields.size > 1
206
- operation = :fields_array
216
+ operation = :subfields_array
207
217
  else
208
- operation = :field_array
218
+ operation = :subfield_array
209
219
  end
210
220
  else
211
221
  throw "Unknown method invocation: '#{name}' with: #{args}"
@@ -73,6 +73,10 @@ module Libis
73
73
  'float'
74
74
  when DateTime, Date, Time
75
75
  'datetime'
76
+ when Array
77
+ 'array'
78
+ when Hash
79
+ 'hash'
76
80
  else
77
81
  send(:default).class.name.downcase
78
82
  end
@@ -95,9 +99,17 @@ module Libis
95
99
  when 'datetime'
96
100
  return v.to_datetime if v.respond_to? :to_datetime
97
101
  return DateTime.parse(v)
102
+ when 'array'
103
+ return v if v.is_a?(Array)
104
+ return v.split(/[,;|\s]+/) if v.is_a?(String)
105
+ return v.to_a if v.respond_to?(:to_a)
106
+ when 'hash'
107
+ return v when v.is_a?(Hash)
108
+ return Hash[(0...v.size).zip(v)] when v.is_a?(Array)
98
109
  else
99
110
  raise RuntimeError, "Datatype not supported: '#{dtype}'"
100
111
  end
112
+ nil
101
113
  end
102
114
 
103
115
  def check_constraint(v, constraint = nil)
@@ -145,8 +157,6 @@ module Libis
145
157
  end
146
158
  end
147
159
 
148
- protected
149
-
150
160
  def parameters
151
161
  @parameters ||= Hash.new
152
162
  end