libis-tools 0.9.9 → 0.9.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +4 -2
  3. data/README.md +19 -0
  4. data/lib/libis/tools.rb +1 -0
  5. data/lib/libis/tools/config_file.rb +1 -1
  6. data/lib/libis/tools/extend/kernel.rb +16 -0
  7. data/lib/libis/tools/metadata.rb +20 -0
  8. data/lib/libis/tools/metadata/dublin_core_record.rb +1 -1
  9. data/lib/libis/tools/metadata/{field_spec.rb → field_format.rb} +7 -7
  10. data/lib/libis/tools/metadata/fix_field.rb +6 -1
  11. data/lib/libis/tools/metadata/mapper.rb +80 -0
  12. data/lib/libis/tools/metadata/mappers/flandrica.rb +69 -0
  13. data/lib/libis/tools/metadata/mappers/kuleuven.rb +1702 -0
  14. data/lib/libis/tools/metadata/marc21_record.rb +5 -4
  15. data/lib/libis/tools/metadata/marc_record.rb +96 -37
  16. data/lib/libis/tools/metadata/parser/basic_parser.rb +118 -0
  17. data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +36 -0
  18. data/lib/libis/tools/metadata/parser/marc21_parser.rb +206 -0
  19. data/lib/libis/tools/metadata/parser/marc_format_parser.rb +52 -0
  20. data/lib/libis/tools/metadata/parser/marc_rules.rb +35 -0
  21. data/lib/libis/tools/metadata/parser/marc_select_parser.rb +25 -0
  22. data/lib/libis/tools/metadata/parser/patch.rb +21 -0
  23. data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +71 -0
  24. data/lib/libis/tools/metadata/parsers.rb +12 -0
  25. data/lib/libis/tools/metadata/var_field.rb +57 -47
  26. data/lib/libis/tools/parameter.rb +12 -2
  27. data/lib/libis/tools/version.rb +1 -1
  28. data/libis-tools.gemspec +4 -3
  29. data/spec/config_spec.rb +3 -1
  30. data/spec/data/MetadataMapping.xlsx +0 -0
  31. data/spec/metadata/8389207.marc +96 -0
  32. data/spec/metadata/dublin_core_parser_spec.rb +48 -0
  33. data/spec/metadata/marc21_parser_data.rb +382 -0
  34. data/spec/metadata/marc21_parser_spec.rb +67 -0
  35. data/spec/metadata/marc21_spec.rb +116 -0
  36. data/spec/metadata/metadata_mapper_spec.rb +23 -0
  37. data/spec/spec_helper.rb +13 -0
  38. data/test.rb +61 -0
  39. metadata +77 -7
  40. data/lib/libis/tools/dc_record.rb +0 -47
@@ -0,0 +1,52 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ require_relative 'basic_parser'
6
+ require_relative 'marc_rules'
7
+
8
+ module Libis
9
+ module Tools
10
+ module Metadata
11
+
12
+ # noinspection RubyResolve
13
+ class MarcFormatParser < Libis::Tools::Metadata::BasicParser
14
+ include Libis::Tools::Metadata::MarcRules
15
+
16
+ root(:mapping)
17
+
18
+ rule(:mapping) { entry.repeat(1).as(:entry) >> postfix?.as(:postfix) }
19
+
20
+ rule(:entry) { group.as(:group) | sf_reference }
21
+ rule(:group) { prefix?.as(:prefix) >> grouped(mapping) }
22
+
23
+ # pre- and postfix
24
+ rule(:prefix) { other.repeat(1) }
25
+ rule(:prefix) { text }
26
+ rule(:prefix?) { prefix.maybe }
27
+ rule(:postfix) { other.repeat(1) }
28
+ rule(:postfix) { text }
29
+ rule(:postfix?) { postfix.maybe }
30
+
31
+ # subfield reference
32
+ rule(:sf_reference) { sf_variable.as(:subfield) | sf_fixed.as(:fixfield) }
33
+
34
+ rule(:sf_variable) { prefix?.as(:prefix) >> sf_indicator >> sf_repeat?.as(:repeat) >> sf_name }
35
+ rule(:sf_repeat) { star >>
36
+ (dquote >> not_dquote.repeat.as(:separator) >> dquote |
37
+ squote >> not_squote.repeat.as(:separator) >> squote
38
+ ).maybe
39
+ }
40
+ rule(:sf_repeat?) { sf_repeat.maybe }
41
+
42
+ rule(:sf_fixed) { prefix?.as(:prefix) >> sf_indicator >> str('@') >> (sf_position | sf_range | sf_star) }
43
+ rule(:sf_position) { lsparen >> integer.as(:position) >> rsparen }
44
+ rule(:sf_range) { lsparen >> integer.as(:first) >> minus >> integer.as(:last) >> rsparen }
45
+ rule(:sf_star) { star.as(:all) }
46
+
47
+ rule(:other) { paren.absent? >> dollar.absent? >> any | str('$$') }
48
+ end
49
+
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ module Libis
6
+ module Tools
7
+ module Metadata
8
+
9
+ # noinspection RubyResolve
10
+ module MarcRules
11
+ include Parslet
12
+
13
+ # tag
14
+ rule(:tag) { tag_numeric | tag_alpha }
15
+ rule(:tag_numeric) { number.repeat(3, 3) }
16
+ rule(:tag_alpha) { character.repeat(3, 3) }
17
+
18
+ # indicator
19
+ rule(:indicator) { hashtag | underscore | number | character }
20
+ rule(:indicator?) { indicator.maybe }
21
+ rule(:indicators) { indicator?.as(:ind1) >> indicator?.as(:ind2) }
22
+
23
+ # subfield
24
+ rule(:sf_indicator) { dollar }
25
+ rule(:sf_name) { (character | number).as(:name) }
26
+ rule(:sf_name?) { sf_name.maybe }
27
+ rule(:sf_names) { (character | number).repeat(1).as(:names) }
28
+ rule(:sf_names?) { sf_names.maybe }
29
+ rule(:subfield) { sf_indicator >> sf_name }
30
+
31
+ end
32
+
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ require_relative 'basic_parser'
6
+ require_relative 'marc_rules'
7
+
8
+ module Libis
9
+ module Tools
10
+ module Metadata
11
+
12
+ # noinspection RubyResolve
13
+ class MarcSelectParser < Libis::Tools::Metadata::BasicParser
14
+ include Libis::Tools::Metadata::MarcRules
15
+ root(:MARC)
16
+ rule(:MARC) { str('MARC') >> spaces? >> tag.as(:tag) >> spaces? >> indicators >> spaces? >> subfield.maybe.as(:subfield) }
17
+
18
+ # subfield
19
+ # rule(:sf_condition) { sf_indicator >> sf_names >> (space >> sf_names).repeat }
20
+ # rule(:sf_names) { sf_name.repeat(1) }
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,21 @@
1
+ class Parslet::Pattern
2
+
3
+ def element_match_hash(tree, exp, bindings)
4
+ return false if exp.size < tree.size
5
+ exp.each do |expected_key, expected_value|
6
+ if expected_key.to_s =~ /^(.*)\?$/
7
+ expected_key = expected_key.is_a?(Symbol) ? $1.to_sym : $1
8
+ return true unless tree.has_key? expected_key
9
+ end
10
+
11
+ return false unless tree.has_key? expected_key
12
+
13
+ # Recurse into the value and stop early on failure
14
+ value = tree[expected_key]
15
+ return false unless element_match(value, expected_value, bindings)
16
+ end
17
+
18
+ true
19
+ end
20
+
21
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+
3
+ require 'parslet'
4
+
5
+ require_relative 'basic_parser'
6
+
7
+ module Libis
8
+ module Tools
9
+ module Metadata
10
+
11
+ # noinspection RubyResolve
12
+ class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
13
+
14
+ root(:criteria)
15
+
16
+ rule(:criteria) { selection >> (spaces >> selection).repeat }
17
+
18
+ rule(:selection) { must >> must_not.maybe }
19
+
20
+ rule(:must) { names.as(:must).maybe >> (one_of | only_one_of).maybe }
21
+ rule(:must_not) { minus >> must.as(:not) }
22
+
23
+ rule(:one_of) { lrparen >> names.as(:one_of) >> rrparen }
24
+ rule(:only_one_of) { lcparen >> names.as(:only_one_of) >> rcparen }
25
+
26
+ rule(:names) { (character | number).repeat(1) }
27
+
28
+ def criteria_to_s(criteria)
29
+ case criteria
30
+ when Array
31
+ # leave as is
32
+ when Hash
33
+ criteria = [criteria]
34
+ else
35
+ return criteria
36
+ end
37
+ criteria.map { |selection| selection_to_s(selection) }.join(' ')
38
+ end
39
+
40
+ def selection_to_s(selection)
41
+ return selection unless selection.is_a? Hash
42
+ result = "#{selection[:must]}"
43
+ result += "(#{selection[:one_of]})" if selection[:one_of]
44
+ result += "{#{selection[:only_one_of]}}" if selection[:only_one_of]
45
+ result += "-#{selection_to_s(selection[:not])}" if selection[:not]
46
+ result
47
+ end
48
+
49
+ def match_criteria(criteria, data)
50
+ tree = self.new.parse(criteria)
51
+ return true if tree.is_a? String
52
+ tree = [tree] unless tree.is_a? Array
53
+ tree.map { |selection| match_selection(selection, data) }.any?
54
+ end
55
+
56
+ def match_selection(selection, data)
57
+ must_match = selection[:must].to_s.split('')
58
+ return false unless must_match == (must_match & data)
59
+ one_of = selection[:one_of].to_s.split('')
60
+ return false unless one_of.empty? || (one_of & data).any?
61
+ only_one_of = selection[:only_one_of].to_s.split('')
62
+ return false unless only_one_of.empty? || (only_one_of & data).size != 1
63
+ return false if match_selection(selection[:not], data) if selection[:not]
64
+ true
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,12 @@
1
+ module Libis
2
+ module Tools
3
+ module Metadata
4
+
5
+ autoload :BasicParser, 'libis/tools/metadata/parser/basic_parser'
6
+ autoload :DublinCoreParser, 'libis/tools/metadata/parser/dublin_core_parser'
7
+ autoload :Marc21Parser, 'libis/tools/metadata/parser/marc21_parser'
8
+ autoload :SubfieldCriteriaParser, 'libis/tools/metadata/parser/subfield_criteria_parser'
9
+
10
+ end
11
+ end
12
+ end
@@ -2,6 +2,8 @@
2
2
 
3
3
  require 'libis/tools/assert'
4
4
 
5
+ require_relative 'parser/subfield_criteria_parser'
6
+
5
7
  module Libis
6
8
  module Tools
7
9
  module Metadata
@@ -11,13 +13,17 @@ module Libis
11
13
  attr_reader :tag
12
14
  attr_reader :ind1
13
15
  attr_reader :ind2
14
- attr_reader :subfield
16
+ attr_reader :subfield_data
15
17
 
16
- def initialize(tag, ind1, ind2, subfield = {})
18
+ def initialize(tag, ind1, ind2)
17
19
  @tag = tag
18
- @ind1 = ind1 || ' '
19
- @ind2 = ind2 || ' '
20
- @subfield = subfield || {}
20
+ @ind1 = ind1 || ''
21
+ @ind2 = ind2 || ''
22
+ @subfield_data = Hash.new { |h, k| h[k] = Array.new }
23
+ end
24
+
25
+ def add_subfield(name, value)
26
+ @subfield_data[name] << value
21
27
  end
22
28
 
23
29
  # dump the contents
@@ -25,7 +31,7 @@ module Libis
25
31
  # @return [String] debug output to inspect the contents of the VarField
26
32
  def dump
27
33
  output = "#{@tag}:#{@ind1}:#{@ind2}:\n"
28
- @subfield.each { |s, t| output += "\t#{s}:#{t}\n" }
34
+ @subfield_data.each { |s, t| output += "\t#{s}:#{t}\n" }
29
35
  output
30
36
  end
31
37
 
@@ -34,7 +40,7 @@ module Libis
34
40
  # @return [String] debug output to inspect the contents of the VarField - Single line version
35
41
  def dump_line
36
42
  output = "#{@tag}:#{@ind1}:#{@ind2}:"
37
- @subfield.each { |s, t| output += "$#{s}#{t}" }
43
+ @subfield_data.each { |s, t| output += "$#{s}#{t}" }
38
44
  output
39
45
  end
40
46
 
@@ -42,57 +48,57 @@ module Libis
42
48
  #
43
49
  # @return [Array] a list of all subfield codes
44
50
  def keys
45
- @subfield.keys
51
+ @subfield_data.keys
46
52
  end
47
53
 
48
54
  # get the first (or only) subfield value for the given code
49
55
  #
50
56
  # @return [String] the first or only entry of a subfield or nil if not present
51
57
  # @param s [Character] the subfield code
52
- def field(s)
53
- field_array(s).first
58
+ def subfield(s)
59
+ subfield_array(s).first
54
60
  end
55
61
 
56
62
  # get a list of all subfield values for a given code
57
63
  #
58
64
  # @return [Array] all the entries of a repeatable subfield
59
65
  # @param s [Character] the subfield code
60
- def field_array(s)
66
+ def subfield_array(s)
61
67
  assert(s.is_a?(String) && (s =~ /^[\da-z]$/) == 0, 'method expects a lower case alphanumerical char')
62
- @subfield.has_key?(s) ? @subfield[s].dup : []
68
+ @subfield_data.has_key?(s) ? @subfield_data[s].dup : []
63
69
  end
64
70
 
65
- # get a list of the first subfield value for all the codes in the given string
71
+ # get a list of the first subfield values for all the codes in the given string
66
72
  #
67
73
  # @return [Array] list of the first or only entries of all subfield codes in the input string
68
- # @param s [String] subfield code specification (see match_fieldspec?)
74
+ # @param s [String] subfield code specification (see match)
69
75
  #
70
- # The subfield codes are cleaned and sorted first (see fieldspec_to_sorted_array)
71
- def fields(s)
76
+ # The subfield codes are cleaned (see criteria_to_array)
77
+ def subfields(s)
72
78
  assert(s.is_a?(String), 'method expects a string')
73
- return [] unless (match_array = match_fieldspec?(s))
74
- fieldspec_to_array(match_array.join(' ')).collect { |i| send(:field, i) }.flatten.compact
79
+ return [] unless (match_array = match(s))
80
+ criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield, i) }.flatten.compact
75
81
  end
76
82
 
77
83
  # get a list of all the subfield values for all the codes in the given string
78
84
  #
79
85
  # @return [Array] list of the all the entries of all subfield codes in the input string
80
- # @param s [String] subfield code specification (see match_fieldspec?)
86
+ # @param s [String] subfield code criteria (see match)
81
87
  #
82
- # The subfield codes are cleaned and sorted first (see fieldspec_to_sorted_array)
88
+ # The subfield codes are cleaned (see criteria_to_array)
83
89
 
84
- def fields_array(s)
90
+ def subfields_array(s)
85
91
  assert(s.is_a?(String), 'method expects a string')
86
- return [] unless (match_array = match_fieldspec?(s))
87
- fieldspec_to_array(match_array.join(' ')).collect { |i| send(:field_array, i) }.flatten.compact
92
+ return [] unless (match_array = match(s))
93
+ criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield_array, i) }.flatten.compact
88
94
  end
89
95
 
90
- # check if the current VarField matches the given field specification.
96
+ # check if the current VarField matches the given subfield criteria.
91
97
  #
92
- # @return [String] The matching part(s) of the specification or nil if no match
93
- # @param fieldspec [String] field specification: sequence of alternative set of subfield codes that should-shouldn't be present
98
+ # @return [String] The matching part(s) of the criteria or nil if no match
99
+ # @param criteria [String] subfield criteria: sequence of alternative set of subfield codes that should-shouldn't be present
94
100
  #
95
- # The fieldspec consists of groups of characters. At least one of these groups should match for the test to succeed
101
+ # The subfield criteria consists of groups of characters. At least one of these groups should match for the test to succeed
96
102
  # Within the group sets of codes may be divided by a hyphen (-). The first set of codes must all be present;
97
103
  # the second set of codes must all <b>not</b> be present. Either set may be empty.
98
104
  #
@@ -123,30 +129,34 @@ module Libis
123
129
  # '$c...$d...' => nil
124
130
  # '$b...$c...$d...' => nil
125
131
  # '$a...$b...$c...$d...' => nil
126
- def match_fieldspec?(fieldspec)
127
- return [] if fieldspec.empty?
128
- result = fieldspec.split.collect { |fs|
129
- fa = fs.split '-'
130
- assert(fa.size <= 2, 'more than one "-" is not allowed in a fieldspec')
131
- must_match = (fa[0] || '').split ''
132
- must_not_match = (fa[1] || '').split ''
133
- next unless (must_match == (must_match & keys)) && (must_not_match & keys).empty?
134
- fs
135
- }.compact
136
- return nil if result.empty?
137
- result
132
+ def match(criteria)
133
+ begin
134
+ parser = Libis::Tools::Metadata::SubfieldCriteriaParser.new
135
+ tree = parser.parse(criteria)
136
+ return [] if tree.is_a? String
137
+ tree = [tree] unless tree.is_a? Array
138
+ result = tree.map do |selection|
139
+ next unless parser.match_selection(selection, keys)
140
+ parser.selection_to_s(selection)
141
+ end.compact
142
+ return nil if result.empty?
143
+ result
144
+ rescue Parslet::ParseFailed => failure
145
+ failure.cause.set_label(criteria)
146
+ raise failure
147
+ end
138
148
  end
139
149
 
140
150
  private
141
151
 
142
152
  # @return [Array] cleaned up version of the input string
143
- # @param fieldspec [String] subfield code specification
153
+ # @param subfields [String] subfield code specification
144
154
  # cleans the subfield code specification and splits it into an array of characters
145
155
  # Duplicates will be removed from the array and the order will be untouched.
146
- def fieldspec_to_array(fieldspec)
156
+ def criteria_to_array(subfields)
147
157
 
148
158
  # note that we remove the '-xxx' part as it is only required for matching
149
- fieldspec.gsub(/ |-\w*/, '').split('').uniq
159
+ subfields.gsub(/ |-\w*/, '').split('').uniq
150
160
  end
151
161
 
152
162
  def sort_helper(x)
@@ -186,7 +196,7 @@ module Libis
186
196
  # # equivalent to: t.fields_array('9ab')
187
197
  #
188
198
  # Note that it is not possible to use a fieldspec for the sequence of subfield codes. Spaces and '-' are not allowed
189
- # in method calls. If you want this, use the #field(s) and #field(s)_array methods.
199
+ # in method calls. If you want this, use the #subfield(s) and #subfield(s)_array methods.
190
200
  #
191
201
  def method_missing(name, *args)
192
202
  operation, subfields = name.to_s.split('_')
@@ -197,15 +207,15 @@ module Libis
197
207
  case operation
198
208
  when 'f'
199
209
  if subfields.size > 1
200
- operation = :fields
210
+ operation = :subfields
201
211
  else
202
- operation = :field
212
+ operation = :subfield
203
213
  end
204
214
  when 'a'
205
215
  if subfields.size > 1
206
- operation = :fields_array
216
+ operation = :subfields_array
207
217
  else
208
- operation = :field_array
218
+ operation = :subfield_array
209
219
  end
210
220
  else
211
221
  throw "Unknown method invocation: '#{name}' with: #{args}"
@@ -73,6 +73,10 @@ module Libis
73
73
  'float'
74
74
  when DateTime, Date, Time
75
75
  'datetime'
76
+ when Array
77
+ 'array'
78
+ when Hash
79
+ 'hash'
76
80
  else
77
81
  send(:default).class.name.downcase
78
82
  end
@@ -95,9 +99,17 @@ module Libis
95
99
  when 'datetime'
96
100
  return v.to_datetime if v.respond_to? :to_datetime
97
101
  return DateTime.parse(v)
102
+ when 'array'
103
+ return v if v.is_a?(Array)
104
+ return v.split(/[,;|\s]+/) if v.is_a?(String)
105
+ return v.to_a if v.respond_to?(:to_a)
106
+ when 'hash'
107
+ return v when v.is_a?(Hash)
108
+ return Hash[(0...v.size).zip(v)] when v.is_a?(Array)
98
109
  else
99
110
  raise RuntimeError, "Datatype not supported: '#{dtype}'"
100
111
  end
112
+ nil
101
113
  end
102
114
 
103
115
  def check_constraint(v, constraint = nil)
@@ -145,8 +157,6 @@ module Libis
145
157
  end
146
158
  end
147
159
 
148
- protected
149
-
150
160
  def parameters
151
161
  @parameters ||= Hash.new
152
162
  end