libis-metadata 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rspec +3 -0
- data/.travis.yml +5 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/metadata +5 -0
- data/lib/libis/metadata/cli/cli_downloader.rb +182 -0
- data/lib/libis/metadata/cli/cli_helper.rb +74 -0
- data/lib/libis/metadata/command_line.rb +25 -0
- data/lib/libis/metadata/downloader.rb +117 -0
- data/lib/libis/metadata/dublin_core_record.rb +115 -0
- data/lib/libis/metadata/field_format.rb +119 -0
- data/lib/libis/metadata/fix_field.rb +33 -0
- data/lib/libis/metadata/mapper.rb +80 -0
- data/lib/libis/metadata/mappers/flandrica.rb +76 -0
- data/lib/libis/metadata/mappers/kuleuven.rb +1929 -0
- data/lib/libis/metadata/mappers/scope.rb +46 -0
- data/lib/libis/metadata/marc21_record.rb +49 -0
- data/lib/libis/metadata/marc_record.rb +285 -0
- data/lib/libis/metadata/parser/basic_parser.rb +116 -0
- data/lib/libis/metadata/parser/dublin_core_parser.rb +35 -0
- data/lib/libis/metadata/parser/marc21_parser.rb +205 -0
- data/lib/libis/metadata/parser/marc_format_parser.rb +51 -0
- data/lib/libis/metadata/parser/marc_rules.rb +34 -0
- data/lib/libis/metadata/parser/marc_select_parser.rb +24 -0
- data/lib/libis/metadata/parser/patch.rb +22 -0
- data/lib/libis/metadata/parser/subfield_criteria_parser.rb +70 -0
- data/lib/libis/metadata/parsers.rb +12 -0
- data/lib/libis/metadata/sharepoint_mapping.rb +119 -0
- data/lib/libis/metadata/sharepoint_record.rb +262 -0
- data/lib/libis/metadata/var_field.rb +242 -0
- data/lib/libis/metadata/version.rb +5 -0
- data/lib/libis/metadata.rb +25 -0
- data/lib/libis-metadata.rb +1 -0
- data/metadata.gemspec +39 -0
- metadata +266 -0
@@ -0,0 +1,12 @@
|
|
1
|
+
module Libis
|
2
|
+
module Metadata
|
3
|
+
module Parser
|
4
|
+
|
5
|
+
autoload :BasicParser, 'libis/metadata/parser/basic_parser'
|
6
|
+
autoload :DublinCoreParser, 'libis/metadata/parser/dublin_core_parser'
|
7
|
+
autoload :Marc21Parser, 'libis/metadata/parser/marc21_parser'
|
8
|
+
autoload :SubfieldCriteriaParser, 'libis/metadata/parser/subfield_criteria_parser'
|
9
|
+
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'csv'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
require 'libis/tools/extend/hash'
|
7
|
+
|
8
|
+
module Libis
|
9
|
+
module Metadata
|
10
|
+
|
11
|
+
# Copy of old Sharepoint mapping class. Needs inspection and probably a mayor update.
|
12
|
+
class SharepointMapping < Hash
|
13
|
+
|
14
|
+
def initialize(mapping_file)
|
15
|
+
|
16
|
+
|
17
|
+
CSV.foreach(mapping_file, headers: true, skip_blanks: true) do |row|
|
18
|
+
next unless row[1]
|
19
|
+
# next unless (row[2] || row[3])
|
20
|
+
|
21
|
+
# compensation for bug in library that reads the Excel data
|
22
|
+
0.upto(5) { |i| row[i] = row[i].gsub(/_x005F(_x[0-9a-fA-F]{4}_)/, '\1') if row[i] }
|
23
|
+
|
24
|
+
name = row[0] ? row[0].strip : nil
|
25
|
+
label = row[1].strip.to_sym
|
26
|
+
dc_tag = row[2] ? row[2].strip : ''
|
27
|
+
db_column = row[3] ? row[3].strip : nil
|
28
|
+
db_datatype = row[4] ? row[4].strip.upcase.to_sym : nil
|
29
|
+
db_valuemask = row[5] ? row[5] : nil
|
30
|
+
# scope_tag = row[6] ? row[6].strip : nil
|
31
|
+
# scope_id = (row[7] and row[7] =~ /[0-9]+/ ? Integer(row[7].strip) : nil)
|
32
|
+
|
33
|
+
|
34
|
+
mapping = {}
|
35
|
+
mapping[:fancy_name] = name if name
|
36
|
+
mapping[:db_column] = db_column if db_column
|
37
|
+
mapping[:db_datatype] = :STRING
|
38
|
+
mapping[:db_datatype] = db_datatype if db_datatype
|
39
|
+
mapping[:db_valuemask] = (mapping[:db_datatype] == :STRING ? "'@@'" : '@@')
|
40
|
+
mapping[:db_valuemask] = db_valuemask if db_valuemask
|
41
|
+
# mapping[:scope_tag] = scope_tag if scope_tag
|
42
|
+
# mapping[:scope_id] = scope_id if scope_id
|
43
|
+
|
44
|
+
if dc_tag.match(/^\s*"(.*)"\s*(<.*)$/)
|
45
|
+
mapping[:dc_prefix] = $1
|
46
|
+
dc_tag = $2
|
47
|
+
end
|
48
|
+
|
49
|
+
if dc_tag.match(/^\s*<dc:[^.]+\.([^.>]+)>(.*)$/)
|
50
|
+
mapping[:dc_tag] = "dcterms:#{$1}"
|
51
|
+
dc_tag = $2
|
52
|
+
|
53
|
+
elsif dc_tag.match(/^\s*<dc:([^.>]+)>(.*)$/)
|
54
|
+
mapping[:dc_tag] = "dc:#{$1}"
|
55
|
+
dc_tag = $2
|
56
|
+
end
|
57
|
+
|
58
|
+
if dc_tag.match(/^\s*"(.*)"\s*$/)
|
59
|
+
mapping[:dc_postfix] = $1
|
60
|
+
end
|
61
|
+
|
62
|
+
self[label] = mapping.empty? ? nil : mapping
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
File.open('mapping.yml', 'wt') { |fp|
|
67
|
+
fp.puts self.to_yaml
|
68
|
+
}
|
69
|
+
super nil
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
def name(label)
|
74
|
+
mapping = self[label]
|
75
|
+
mapping = mapping[:fancy_name] if mapping
|
76
|
+
mapping || label
|
77
|
+
end
|
78
|
+
|
79
|
+
def fancy_label(label)
|
80
|
+
mapping = self[label]
|
81
|
+
mapping = mapping[:fancy_name] if mapping
|
82
|
+
"#{label}#{mapping ? '(' + mapping + ')' : ''}"
|
83
|
+
end
|
84
|
+
|
85
|
+
def dc_tag(label)
|
86
|
+
mapping = self[label]
|
87
|
+
mapping = mapping[:dc_tag] if mapping
|
88
|
+
mapping
|
89
|
+
end
|
90
|
+
|
91
|
+
def dc_prefix(label)
|
92
|
+
mapping = self[label]
|
93
|
+
mapping = mapping[:dc_prefix] if mapping
|
94
|
+
mapping
|
95
|
+
end
|
96
|
+
|
97
|
+
def dc_postfix(label)
|
98
|
+
mapping = self[label]
|
99
|
+
mapping = mapping[:dc_postfix] if mapping
|
100
|
+
mapping
|
101
|
+
end
|
102
|
+
|
103
|
+
def db_column(label)
|
104
|
+
mapping = self[label]
|
105
|
+
mapping = mapping[:db_column] if mapping
|
106
|
+
mapping
|
107
|
+
end
|
108
|
+
|
109
|
+
def db_value(label, value)
|
110
|
+
mapping = self[label]
|
111
|
+
return nil unless mapping
|
112
|
+
mask = mapping[:db_valuemask]
|
113
|
+
mask.gsub('@@', value.to_s)
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,262 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
require 'libis/tools/extend/hash'
|
6
|
+
require 'libis/tools/xml_document'
|
7
|
+
|
8
|
+
module Libis
|
9
|
+
module Metadata
|
10
|
+
|
11
|
+
# noinspection RubyTooManyMethodsInspection
|
12
|
+
|
13
|
+
# Copy of the old SharepointRecord class. Needs inspection and probably a mayor update.
|
14
|
+
class SharepointRecord < Hash
|
15
|
+
|
16
|
+
attr_accessor :node
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
@node = nil
|
20
|
+
self[:label_prefix] = ''
|
21
|
+
super nil
|
22
|
+
end
|
23
|
+
|
24
|
+
def label_prefix
|
25
|
+
self[:label_prefix]
|
26
|
+
end
|
27
|
+
|
28
|
+
def label_prefix=(value)
|
29
|
+
self[:label_prefix] = value
|
30
|
+
end
|
31
|
+
|
32
|
+
def label
|
33
|
+
(self[:ows_Title1] || self[:ows_BaseName] || file_name).to_s
|
34
|
+
end
|
35
|
+
|
36
|
+
def title
|
37
|
+
self[:label_prefix] + ' ' + self.label
|
38
|
+
end
|
39
|
+
|
40
|
+
def content_type
|
41
|
+
self[:ows_ContentType]
|
42
|
+
end
|
43
|
+
|
44
|
+
def file_name
|
45
|
+
self[:ows_FileLeafRef]
|
46
|
+
end
|
47
|
+
|
48
|
+
def file_path
|
49
|
+
self[:ows_FileRef]
|
50
|
+
end
|
51
|
+
|
52
|
+
def file_size
|
53
|
+
self[:ows_FileSizeDisplay]
|
54
|
+
end
|
55
|
+
|
56
|
+
def url
|
57
|
+
# self[:ows_EncodedAbsUrl]
|
58
|
+
# 'https://www.groupware.kuleuven.be' + URI.escape(self[:ows_ServerUrl], Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))
|
59
|
+
# 'https://www.groupware.kuleuven.be' + URI.escape(self[:ows_ServerUrl])
|
60
|
+
URI.escape(URI.unescape(self[:ows_EncodedAbsUrl]))
|
61
|
+
end
|
62
|
+
|
63
|
+
def relative_path
|
64
|
+
return file_path.gsub(/^sites\/lias\/Gedeelde documenten\//, '') if file_path
|
65
|
+
nil
|
66
|
+
end
|
67
|
+
|
68
|
+
def local_path(sub_dir)
|
69
|
+
return relative_path unless sub_dir
|
70
|
+
sub_dir += '/' unless sub_dir[-1] == '/'
|
71
|
+
return relative_path.gsub(/^#{sub_dir}/, '') if relative_path
|
72
|
+
nil
|
73
|
+
end
|
74
|
+
|
75
|
+
def is_file?
|
76
|
+
return true if [:file, :mfile].include? simple_content_type
|
77
|
+
false
|
78
|
+
end
|
79
|
+
|
80
|
+
def is_described?
|
81
|
+
=begin
|
82
|
+
self[:ows_Title1] and
|
83
|
+
( self[:ows_Creation_x0020_date_x0028_s_x0029_] or
|
84
|
+
self[:ows_Startdate] or
|
85
|
+
self[:ows_Enddate]
|
86
|
+
)
|
87
|
+
=end
|
88
|
+
self[:ows_Unit_of_description]
|
89
|
+
end
|
90
|
+
|
91
|
+
def simple_content_type
|
92
|
+
case content_type
|
93
|
+
when /^Archief/i
|
94
|
+
return :archive
|
95
|
+
when /^Bestanddeel \(folder\)/i
|
96
|
+
return :map
|
97
|
+
when /^Bestanddeel of stuk \(document\)/i
|
98
|
+
return :file
|
99
|
+
when /^Meervoudige beschrijving \(folder\)/i
|
100
|
+
return :mmap
|
101
|
+
when /^Meervoudige beschrijving \(document\)/i
|
102
|
+
return :mfile
|
103
|
+
when /^Tussenniveau/i
|
104
|
+
return :map
|
105
|
+
when /^Film/i
|
106
|
+
return :file
|
107
|
+
when /^Object/i
|
108
|
+
return :file
|
109
|
+
when /^Document/i
|
110
|
+
return :file
|
111
|
+
else
|
112
|
+
# type code here
|
113
|
+
end
|
114
|
+
:unknown
|
115
|
+
end
|
116
|
+
|
117
|
+
def content_code
|
118
|
+
case simple_content_type
|
119
|
+
when :archive
|
120
|
+
'a'
|
121
|
+
when :map
|
122
|
+
'm'
|
123
|
+
when :file
|
124
|
+
'f'
|
125
|
+
when :mmap
|
126
|
+
'v'
|
127
|
+
when :mfile
|
128
|
+
'<'
|
129
|
+
when :unknown
|
130
|
+
'-'
|
131
|
+
else
|
132
|
+
' '
|
133
|
+
end + (is_described? ? '*' : ' ')
|
134
|
+
end
|
135
|
+
|
136
|
+
def ingest_model
|
137
|
+
return self[:ows_Ingestmodel] if self[:ows_Ingestmodel]
|
138
|
+
return self.node.parent.content.ingest_model if node and node.parent and node.parent.content
|
139
|
+
nil
|
140
|
+
end
|
141
|
+
|
142
|
+
def accessright_model
|
143
|
+
return self[:ows_Access_x0020_rights_x0020_model] if self[:ows_Access_x0020_rights_x0020_model]
|
144
|
+
return self.node.parent.content.accessright_model if node and node.parent and node.parent.content
|
145
|
+
nil
|
146
|
+
end
|
147
|
+
|
148
|
+
def to_raw
|
149
|
+
self
|
150
|
+
end
|
151
|
+
|
152
|
+
def to_xml
|
153
|
+
|
154
|
+
xml_doc = Libis::Tools::XmlDocument.new
|
155
|
+
|
156
|
+
xml_doc.root = xml_doc.create_node('record')
|
157
|
+
|
158
|
+
self.each do |label, value|
|
159
|
+
|
160
|
+
unless label == :node
|
161
|
+
#noinspection RubyResolve
|
162
|
+
xml_doc.root << xml_doc.create_text_node(label.to_s, value.to_s)
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
xml_doc
|
168
|
+
|
169
|
+
end
|
170
|
+
|
171
|
+
def self.from_xml(xml_node)
|
172
|
+
|
173
|
+
record = Libis::Metadata::SharepointRecord.new
|
174
|
+
|
175
|
+
xml_node.element_children.each do |node|
|
176
|
+
record[node.name.to_sym] = node.content
|
177
|
+
end
|
178
|
+
|
179
|
+
record
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
# @param [Libis::Metadata::SharepointMapping] mapping
|
184
|
+
def to_dc(mapping)
|
185
|
+
|
186
|
+
return nil unless mapping and mapping.is_a? Hash
|
187
|
+
|
188
|
+
xml_doc = Libis::Tools::XmlDocument.new
|
189
|
+
|
190
|
+
#noinspection RubyStringKeysInHashInspection
|
191
|
+
xml_doc.root = xml_doc.create_node(
|
192
|
+
'record',
|
193
|
+
namespaces: {
|
194
|
+
'dc' => 'http://purl.org/dc/elements/1.1/',
|
195
|
+
'xsi' => 'http://www.w3.org/2001/XMLSchema-instance',
|
196
|
+
'dcterms' => 'http://purl.org/dc/terms/'})
|
197
|
+
|
198
|
+
self.each do |label, value|
|
199
|
+
dc_tag = mapping.dc_tag(label)
|
200
|
+
next unless dc_tag
|
201
|
+
dc_value = (mapping.dc_prefix(label) || '') + value.to_s + (mapping.dc_postfix(label) || '')
|
202
|
+
#noinspection RubyResolve
|
203
|
+
xml_doc.root << xml_doc.create_text_node(dc_tag, dc_value)
|
204
|
+
end
|
205
|
+
|
206
|
+
if xml_doc.xpath('//dc:title').size == 0
|
207
|
+
xml_doc.root << xml_doc.create_text_node('dc:title', self[:ows_BaseName])
|
208
|
+
end
|
209
|
+
|
210
|
+
xml_doc
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
# @param [Libis::Metadata::SharepointMapping] mapping
|
215
|
+
def to_sql(mapping)
|
216
|
+
sql_fields = []
|
217
|
+
sql_values = []
|
218
|
+
|
219
|
+
self.each do |label, value|
|
220
|
+
db_column = mapping.db_column(label)
|
221
|
+
next unless db_column
|
222
|
+
db_value = mapping.db_value(label, value)
|
223
|
+
next unless db_value and db_value != "''"
|
224
|
+
sql_fields << db_column
|
225
|
+
sql_values << db_value.escape_for_sql
|
226
|
+
end
|
227
|
+
|
228
|
+
sql_fields.each_with_index { |element, index| (index % 10 == 0) && (sql_fields[index] = "\n " + element)
|
229
|
+
}
|
230
|
+
sql_values.each_with_index { |element, index| (index % 10 == 0) && (sql_values[index] = "\n " + element)
|
231
|
+
}
|
232
|
+
|
233
|
+
'INSERT INTO @TABLE_NAME@ (' + sql_fields.join(',') + ")\n VALUES (" + sql_values.join(',') + ');'
|
234
|
+
|
235
|
+
end
|
236
|
+
|
237
|
+
def create_dc(dir, mapping)
|
238
|
+
xml_doc = to_dc mapping
|
239
|
+
dc_file = "#{dir}/dc_#{self[:index].to_s}.xml"
|
240
|
+
xml_doc.save dc_file
|
241
|
+
dc_file
|
242
|
+
end
|
243
|
+
|
244
|
+
def to_s
|
245
|
+
super
|
246
|
+
end
|
247
|
+
|
248
|
+
def print_metadata(f, mapping)
|
249
|
+
f.printf "%6d -------------------------------------------------------------------------\n", self[:index].to_i
|
250
|
+
self.each do |label, value|
|
251
|
+
next if label == :node
|
252
|
+
# next if label == :index
|
253
|
+
name = mapping.fancy_label(label)
|
254
|
+
f.printf " %40s : %s\n", name, value
|
255
|
+
end
|
256
|
+
|
257
|
+
end
|
258
|
+
|
259
|
+
end
|
260
|
+
|
261
|
+
end
|
262
|
+
end
|
@@ -0,0 +1,242 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'libis/tools/assert'
|
4
|
+
|
5
|
+
require_relative 'parser/subfield_criteria_parser'
|
6
|
+
|
7
|
+
module Libis
|
8
|
+
module Tools
|
9
|
+
module Metadata
|
10
|
+
|
11
|
+
# Helper class implementing a variable field for MARC
|
12
|
+
class VarField
|
13
|
+
|
14
|
+
attr_reader :tag
|
15
|
+
attr_reader :ind1
|
16
|
+
attr_reader :ind2
|
17
|
+
attr_reader :subfield_data
|
18
|
+
|
19
|
+
# Create new variable field with given tag and indicators
|
20
|
+
# @param [String] tag tag
|
21
|
+
# @param [String] ind1 first indicator. nil will be translated into empty string.
|
22
|
+
# @param [String] ind2 second indicator. nil will be translated into empty string.
|
23
|
+
def initialize(tag, ind1, ind2)
|
24
|
+
@tag = tag
|
25
|
+
@ind1 = ind1 || ''
|
26
|
+
@ind2 = ind2 || ''
|
27
|
+
@subfield_data = Hash.new { |h, k| h[k] = Array.new }
|
28
|
+
end
|
29
|
+
|
30
|
+
# Add subfield to variable field
|
31
|
+
# @param [String] name subfield indicator without '$'
|
32
|
+
# @param [String] value content of the subfield
|
33
|
+
def add_subfield(name, value)
|
34
|
+
@subfield_data[name] << value
|
35
|
+
end
|
36
|
+
|
37
|
+
# dump the contents
|
38
|
+
#
|
39
|
+
# @return [String] debug output to inspect the contents of the VarField
|
40
|
+
def dump
|
41
|
+
output = "#{@tag}:#{@ind1}:#{@ind2}:\n"
|
42
|
+
@subfield_data.each { |s, t| output += "\t#{s}:#{t}\n" }
|
43
|
+
output
|
44
|
+
end
|
45
|
+
|
46
|
+
# dump the contents
|
47
|
+
#
|
48
|
+
# @return [String] debug output to inspect the contents of the VarField - Single line version
|
49
|
+
def dump_line
|
50
|
+
output = "#{@tag}:#{@ind1}:#{@ind2}:"
|
51
|
+
@subfield_data.each { |s, t| output += "$#{s}#{t}" }
|
52
|
+
output
|
53
|
+
end
|
54
|
+
|
55
|
+
# list the subfield codes
|
56
|
+
#
|
57
|
+
# @return [Array] a list of all subfield codes
|
58
|
+
def keys
|
59
|
+
@subfield_data.keys
|
60
|
+
end
|
61
|
+
|
62
|
+
# get the first (or only) subfield value for the given code
|
63
|
+
#
|
64
|
+
# @return [String] the first or only entry of a subfield or nil if not present
|
65
|
+
# @param s [Character] the subfield code
|
66
|
+
def subfield(s)
|
67
|
+
subfield_array(s).first
|
68
|
+
end
|
69
|
+
|
70
|
+
# get a list of all subfield values for a given code
|
71
|
+
#
|
72
|
+
# @return [Array] all the entries of a repeatable subfield
|
73
|
+
# @param s [Character] the subfield code
|
74
|
+
def subfield_array(s)
|
75
|
+
assert(s.is_a?(String) && (s =~ /^[\da-z]$/) == 0, 'method expects a lower case alphanumerical char')
|
76
|
+
@subfield_data.has_key?(s) ? @subfield_data[s].dup : []
|
77
|
+
end
|
78
|
+
|
79
|
+
# get a list of the first subfield values for all the codes in the given string
|
80
|
+
#
|
81
|
+
# @return [Array] list of the first or only entries of all subfield codes in the input string
|
82
|
+
# @param s [String] subfield code specification (see match)
|
83
|
+
#
|
84
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
85
|
+
def subfields(s)
|
86
|
+
assert(s.is_a?(String), 'method expects a string')
|
87
|
+
return [] unless (match_array = match(s))
|
88
|
+
criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield, i) }.flatten.compact
|
89
|
+
end
|
90
|
+
|
91
|
+
# get a list of all the subfield values for all the codes in the given string
|
92
|
+
#
|
93
|
+
# @return [Array] list of the all the entries of all subfield codes in the input string
|
94
|
+
# @param s [String] subfield code criteria (see match)
|
95
|
+
#
|
96
|
+
# The subfield codes are cleaned (see criteria_to_array)
|
97
|
+
|
98
|
+
def subfields_array(s)
|
99
|
+
assert(s.is_a?(String), 'method expects a string')
|
100
|
+
return [] unless (match_array = match(s))
|
101
|
+
criteria_to_array(match_array.join(' ')).collect { |i| send(:subfield_array, i) }.flatten.compact
|
102
|
+
end
|
103
|
+
|
104
|
+
# check if the current VarField matches the given subfield criteria.
|
105
|
+
#
|
106
|
+
# @return [String] The matching part(s) of the criteria or nil if no match
|
107
|
+
# @param criteria [String] subfield criteria: sequence of alternative set of subfield codes that should-shouldn't be present
|
108
|
+
#
|
109
|
+
# The subfield criteria consists of groups of characters. At least one of these groups should match for the test to succeed
|
110
|
+
# Within the group sets of codes may be divided by a hyphen (-). The first set of codes must all be present;
|
111
|
+
# the second set of codes must all <b>not</b> be present. Either set may be empty.
|
112
|
+
#
|
113
|
+
# Examples:
|
114
|
+
# 'ab' matches '$a...$b...' => ['ab']
|
115
|
+
# '$a...$b...$c...' => ['ab']
|
116
|
+
# but not '$a...' => nil # ($b missing)
|
117
|
+
# '$b...' => nil # ($a missing)
|
118
|
+
# 'a b' matches '$a...' => ['a']
|
119
|
+
# '$b...' => ['b']
|
120
|
+
# '$a...$b...' => ['a', 'b']
|
121
|
+
# '$a...$b...$c...' => ['a', 'b']
|
122
|
+
# but not '$c...' => nil # ($a or $b must be present)
|
123
|
+
# 'abc-d' matches '$a..,$b...$c...' => ['abc-d']
|
124
|
+
# '$a..,$b...$c...$e...' => ['abc-d']
|
125
|
+
# but not '$a...$b...$e...' => nil # ($c missing)
|
126
|
+
# '$a...$b...$c...$d...' => nil # ($d should not be present)
|
127
|
+
# 'a-b b-a' matches '$a...' => ['a-b']
|
128
|
+
# '$a...$c...' => ['a-b']
|
129
|
+
# '$b...' => ['b-a']
|
130
|
+
# '$b...$c...' => ['b-a']
|
131
|
+
# but not '$a...$b...' => nil
|
132
|
+
# 'a-b c-d' matches '$a...' => ['a-b']
|
133
|
+
# '$a...$c...' => ['a-b', 'c-d']
|
134
|
+
# '$a...$b...$c...' => ['c-d']
|
135
|
+
# '$b...$c...' => ['c-d']
|
136
|
+
# but not '$a...$b...' => nil
|
137
|
+
# '$c...$d...' => nil
|
138
|
+
# '$b...$c...$d...' => nil
|
139
|
+
# '$a...$b...$c...$d...' => nil
|
140
|
+
def match(criteria)
|
141
|
+
begin
|
142
|
+
parser = Libis::Tools::Metadata::SubfieldCriteriaParser.new
|
143
|
+
tree = parser.parse(criteria)
|
144
|
+
return [] if tree.is_a? String
|
145
|
+
tree = [tree] unless tree.is_a? Array
|
146
|
+
result = tree.map do |selection|
|
147
|
+
next unless parser.match_selection(selection, keys)
|
148
|
+
parser.selection_to_s(selection)
|
149
|
+
end.compact
|
150
|
+
return nil if result.empty?
|
151
|
+
result
|
152
|
+
rescue Parslet::ParseFailed => failure
|
153
|
+
failure.cause.set_label(criteria)
|
154
|
+
raise failure
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
private
|
159
|
+
|
160
|
+
# @return [Array] cleaned up version of the input string
|
161
|
+
# @param subfields [String] subfield code specification
|
162
|
+
# cleans the subfield code specification and splits it into an array of characters
|
163
|
+
# Duplicates will be removed from the array and the order will be untouched.
|
164
|
+
def criteria_to_array(subfields)
|
165
|
+
|
166
|
+
# note that we remove the '-xxx' part as it is only required for matching
|
167
|
+
subfields.gsub(/ |-\w*/, '').split('').uniq
|
168
|
+
end
|
169
|
+
|
170
|
+
def sort_helper(x)
|
171
|
+
# make sure that everything below 'A' is higher than 'z'
|
172
|
+
# note that this only works for numbers, but that is fine in our case.
|
173
|
+
x < 'A' ? (x.to_i + 123).chr : x
|
174
|
+
end
|
175
|
+
|
176
|
+
# implementation for methods for retrieving subfield values
|
177
|
+
#
|
178
|
+
# The methods start with a single character: the operation
|
179
|
+
# 'f' for retrieving only the first occurence of the subfield
|
180
|
+
# 'a' for retrieving all the subfield values for each of the given subfields
|
181
|
+
# if omitted, 'f' is assumed
|
182
|
+
#
|
183
|
+
# Then a '_' acts as a subdivider between the operation and the subfield(s). It must always be present, even
|
184
|
+
# if the operation is omitted.
|
185
|
+
#
|
186
|
+
# The last past is a sequence of subfield codes that should be used for selecting the values. The order in which the
|
187
|
+
# subfields are listed is respected in the resulting array of values.
|
188
|
+
#
|
189
|
+
# Examples:
|
190
|
+
#
|
191
|
+
# t = VarField.new('100', '', '',
|
192
|
+
# { 'a' => %w'Name NickName',
|
193
|
+
# 'b' => %w'LastName MaidenName',
|
194
|
+
# 'c' => %w'eMail',
|
195
|
+
# '1' => %w'Age',
|
196
|
+
# '9' => %w'Score'})
|
197
|
+
#
|
198
|
+
# # >> 100##$aName$aNickName$bLastName$bMaidenName$ceMail$1Age$9Score <<
|
199
|
+
#
|
200
|
+
# t._1ab => ['Age', 'Name', 'LastName']
|
201
|
+
# # equivalent to: t.f_1av or t.fields('1ab')
|
202
|
+
#
|
203
|
+
# t.a_9ab => ['Score', 'Name', 'NickName', 'LastName', 'MaidenName']
|
204
|
+
# # equivalent to: t.fields_array('9ab')
|
205
|
+
#
|
206
|
+
# Note that it is not possible to use a fieldspec for the sequence of subfield codes. Spaces and '-' are not allowed
|
207
|
+
# in method calls. If you want this, use the #subfield(s) and #subfield(s)_array methods.
|
208
|
+
#
|
209
|
+
def method_missing(name, *args)
|
210
|
+
operation, subfields = name.to_s.split('_')
|
211
|
+
assert(subfields.size > 0, 'need to specify at least one subfield')
|
212
|
+
operation = 'f' if operation.empty?
|
213
|
+
# convert subfield list to fieldspec
|
214
|
+
subfields = subfields.split('').join(' ')
|
215
|
+
case operation
|
216
|
+
when 'f'
|
217
|
+
if subfields.size > 1
|
218
|
+
operation = :subfields
|
219
|
+
else
|
220
|
+
operation = :subfield
|
221
|
+
end
|
222
|
+
when 'a'
|
223
|
+
if subfields.size > 1
|
224
|
+
operation = :subfields_array
|
225
|
+
else
|
226
|
+
operation = :subfield_array
|
227
|
+
end
|
228
|
+
else
|
229
|
+
throw "Unknown method invocation: '#{name}' with: #{args}"
|
230
|
+
end
|
231
|
+
send(operation, subfields)
|
232
|
+
end
|
233
|
+
|
234
|
+
def to_ary
|
235
|
+
nil
|
236
|
+
end
|
237
|
+
|
238
|
+
end
|
239
|
+
|
240
|
+
end
|
241
|
+
end
|
242
|
+
end
|