libis-metadata 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +5 -0
  5. data/Gemfile +6 -0
  6. data/LICENSE.txt +21 -0
  7. data/README.md +39 -0
  8. data/Rakefile +6 -0
  9. data/bin/console +14 -0
  10. data/bin/setup +8 -0
  11. data/exe/metadata +5 -0
  12. data/lib/libis/metadata/cli/cli_downloader.rb +182 -0
  13. data/lib/libis/metadata/cli/cli_helper.rb +74 -0
  14. data/lib/libis/metadata/command_line.rb +25 -0
  15. data/lib/libis/metadata/downloader.rb +117 -0
  16. data/lib/libis/metadata/dublin_core_record.rb +115 -0
  17. data/lib/libis/metadata/field_format.rb +119 -0
  18. data/lib/libis/metadata/fix_field.rb +33 -0
  19. data/lib/libis/metadata/mapper.rb +80 -0
  20. data/lib/libis/metadata/mappers/flandrica.rb +76 -0
  21. data/lib/libis/metadata/mappers/kuleuven.rb +1929 -0
  22. data/lib/libis/metadata/mappers/scope.rb +46 -0
  23. data/lib/libis/metadata/marc21_record.rb +49 -0
  24. data/lib/libis/metadata/marc_record.rb +285 -0
  25. data/lib/libis/metadata/parser/basic_parser.rb +116 -0
  26. data/lib/libis/metadata/parser/dublin_core_parser.rb +35 -0
  27. data/lib/libis/metadata/parser/marc21_parser.rb +205 -0
  28. data/lib/libis/metadata/parser/marc_format_parser.rb +51 -0
  29. data/lib/libis/metadata/parser/marc_rules.rb +34 -0
  30. data/lib/libis/metadata/parser/marc_select_parser.rb +24 -0
  31. data/lib/libis/metadata/parser/patch.rb +22 -0
  32. data/lib/libis/metadata/parser/subfield_criteria_parser.rb +70 -0
  33. data/lib/libis/metadata/parsers.rb +12 -0
  34. data/lib/libis/metadata/sharepoint_mapping.rb +119 -0
  35. data/lib/libis/metadata/sharepoint_record.rb +262 -0
  36. data/lib/libis/metadata/var_field.rb +242 -0
  37. data/lib/libis/metadata/version.rb +5 -0
  38. data/lib/libis/metadata.rb +25 -0
  39. data/lib/libis-metadata.rb +1 -0
  40. data/metadata.gemspec +39 -0
  41. metadata +266 -0
@@ -0,0 +1,46 @@
1
+ # encoding: utf-8
2
+
3
+ require 'libis/tools/metadata/dublin_core_record'
4
+ require 'libis/tools/assert'
5
+
6
+ module Libis
7
+ module Tools
8
+ module Metadata
9
+ module Mappers
10
+ # noinspection RubyResolve
11
+
12
+ # Mixin for {::Libis::Tools::Metadata::DublinCoreRecord} to enable conversion of the Scope exported DC record.
13
+ module Scope
14
+
15
+ # Main conversion method.
16
+ # @return [::Libis::Tools::Metadata::DublinCoreRecord]
17
+ def to_dc
18
+ assert(self.is_a? Libis::Tools::Metadata::DublinCoreRecord)
19
+
20
+ doc = Libis::Tools::Metadata::DublinCoreRecord.new(self.to_xml)
21
+
22
+ if doc.isPartOf
23
+
24
+ # create new node for isReferencedBy
25
+ new_node = doc.add_node(
26
+ 'isReferencedBy',
27
+ doc.isPartOf.content,
28
+ nil,
29
+ 'xsi:type' => 'dcterms:URI'
30
+ )
31
+
32
+ # Replace isPartOf with isReferencedBy
33
+ doc.isPartOf.replace new_node
34
+
35
+ end
36
+
37
+ doc
38
+
39
+ end
40
+
41
+ end
42
+
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,49 @@
1
+ # coding: utf-8
2
+
3
+ require 'cgi'
4
+
5
+ require_relative 'marc_record'
6
+
7
+ module Libis
8
+ module Metadata
9
+
10
+ # This class implements the missing private method 'get_all_records' to accomodate for the MARC-XML format.
11
+ class Marc21Record < Libis::Metadata::MarcRecord
12
+
13
+ private
14
+
15
+ def get_all_records
16
+
17
+ @all_records.clear
18
+
19
+ @node.xpath('.//leader').each {|f|
20
+ @all_records['LDR'] << FixField.new('LDR', f.content)
21
+ }
22
+
23
+ @node.xpath('.//controlfield').each {|f|
24
+ tag = f['tag']
25
+ tag = '%03d' % tag.to_i if tag.size < 3
26
+ @all_records[tag] << FixField.new(tag, f.content)
27
+ }
28
+
29
+ @node.xpath('.//datafield').each {|v|
30
+
31
+ tag = v['tag']
32
+ tag = '%03d' % tag.to_i if tag.size < 3
33
+
34
+ varfield = VarField.new(tag, v['ind1'].to_s, v['ind2'].to_s)
35
+
36
+ v.xpath('.//subfield').each {|s| varfield.add_subfield(s['code'], s.content)}
37
+
38
+ @all_records[tag] << varfield
39
+
40
+ }
41
+
42
+ @all_records
43
+
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+ end
@@ -0,0 +1,285 @@
1
+ # coding: utf-8
2
+
3
+ require 'set'
4
+ require 'cgi'
5
+
6
+ require 'libis/tools/xml_document'
7
+ require 'libis/tools/assert'
8
+
9
+ require_relative 'fix_field'
10
+ require_relative 'var_field'
11
+ require_relative 'field_format'
12
+
13
+ module Libis
14
+ module Metadata
15
+
16
+ # noinspection RubyTooManyMethodsInspection
17
+
18
+ # Base class for reading MARC based records.
19
+ #
20
+ # For indicator selection: '#' or '' (empty) is wildcard; '_' or ' ' (space) is blank.
21
+ class MarcRecord
22
+
23
+ # Create a new MarcRecord object
24
+ #
25
+ # @param [XML node] xml_node XML node from Nokogiri or XmlDocument that contains child nodes with the data for
26
+ # one MARC record.
27
+ def initialize(xml_node)
28
+ @node = xml_node
29
+ @node.document.remove_namespaces!
30
+ @all_records = Hash.new {|h, k| h[k] = Array.new}
31
+ end
32
+
33
+ # Access to the XML node that was supplied to the constructor
34
+ # @return [XML node]
35
+ def to_raw
36
+ @node
37
+ end
38
+
39
+ # Returns the internal data structure (a Hash) with all the MARC data.
40
+ #
41
+ # The internal structure is a Hash with the tag as key and as value an Array of either FixField or VarField
42
+ # instances.
43
+ #
44
+ # @return [Hash] internal data structure
45
+ def all
46
+ return @all_records unless @all_records.empty?
47
+ @all_records = get_all_records
48
+ end
49
+
50
+ # Iterates over all the MARC fields.
51
+ #
52
+ # If a block is supplied it will be called for each field in the MARC record. The supplied argument will be the
53
+ # FixField or VarField instance for each field.
54
+ #
55
+ # @return [Array] The list of the field data or return values for each block call.
56
+ def each
57
+ all.map {|_, field_array| field_array}.flatten.map do |field|
58
+ block_given? ? yield(field) : field
59
+ end
60
+ end
61
+
62
+ # Get all fields matching search criteria.
63
+ #
64
+ # A block with one parameter can be supplied when calling this method. Each time a match is found, the block
65
+ # will be called with the field data as argument and the return value of the block will be added to the method's
66
+ # return value. This could for example be used to narrow the selection of the fields:
67
+ #
68
+ # # Only select 700 tags where $4 subfield contains 'abc', 'def' or 'xyz'
69
+ # record.all_tags('700') { |v| v.subfield['4'] =~ /^(abc|def|xyz)$/ ? v : nil }.compact
70
+ #
71
+ # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
72
+ # extra subfield name is added, a result will be created for each instance found of that subfield.
73
+ # @param [String] subfields Subfield specification. See FieldFormat class for more info; ignored for controlfields.
74
+ # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
75
+ # (the field) and should return true or false. True selects the field, false rejects it.
76
+ # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
77
+ # for each tag found. Otherwise the array will just contain the data for each matching tag.
78
+ def all_tags(tag, subfields = '', select_block = Proc.new {|_| true})
79
+ t, ind1, ind2, subfield = tag =~ /^\d{3}/ ? [tag[0..2], tag[3], tag[4], tag[5]] : [tag, nil, nil, nil]
80
+ result = get_records(t, ind1, ind2, subfield, subfields, &select_block)
81
+ return result unless block_given?
82
+ result.map {|record| yield record}
83
+ end
84
+
85
+ alias_method :each_tag, :all_tags
86
+
87
+ # Get all fields matching search criteria.
88
+ # As {#all_tags} but without subfield criteria.
89
+ # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
90
+ # extra subfield name is added, a result will be created for each instance found of that subfield.
91
+ # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
92
+ # (the field) and should return true or false. True selects the field, false rejects it.
93
+ # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
94
+ # for each tag found. Otherwise the array will just contain the data for each matching tag.
95
+ def select_fields(tag, select_block = nil, &block)
96
+ all_tags(tag, nil, select_block, &block)
97
+ end
98
+
99
+ # Find the first tag matching the criteria.
100
+ #
101
+ # If a block is supplied, it will be called with the found field data. The return value will be whatever the
102
+ # block returns. If no block is supplied, the field data will be returned. If nothing was found, the return
103
+ # value is nil.
104
+ #
105
+ # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank.
106
+ # @param [String] subfields Subfield specification. See FieldFormat class for more info; ignored for controlfields.
107
+ # @return [Object] nil if nothing found; field data or whatever block returns.
108
+ def first_tag(tag, subfields = '')
109
+ result = all_tags(tag, subfields).first
110
+ return nil unless result
111
+ return result unless block_given?
112
+ yield result
113
+ end
114
+
115
+ # Find all fields matching the criteria.
116
+ # (see #first_tag)
117
+ # @param (see #first_tag)
118
+ def all_fields(tag, subfields)
119
+ r = all_tags(tag, subfields).collect {|t| t.subfields_array(subfields)}.flatten.compact
120
+ return r unless block_given?
121
+ r.map {|field| yield field}
122
+ r.size > 0
123
+ end
124
+
125
+ # Find the first field matching the criteria
126
+ # (see #all_fields)
127
+ # @param (see #all_fields)
128
+ def first_field(tag, subfields)
129
+ result = all_fields(tag, subfields).first
130
+ return result unless block_given?
131
+ return false unless result
132
+ yield result
133
+ true
134
+ end
135
+
136
+ # Perform action on each field found. Code block required.
137
+ # @param (see #all_fields)
138
+ def each_field(tag, subfields)
139
+ all_fields(tag, subfields).each do |field|
140
+ yield field
141
+ end
142
+ end
143
+
144
+ # Dump content to string.
145
+ def marc_dump
146
+ all.values.flatten.each_with_object([]) {|record, m| m << record.dump}.join
147
+ end
148
+
149
+ # Save the current MARC record to file.
150
+ # @param [String] filename name of the file
151
+ def save(filename)
152
+ doc = ::Libis::Tools::XmlDocument.new
153
+ doc.root = @node
154
+
155
+ return doc unless filename
156
+
157
+ doc.save filename, save_with: (::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS |
158
+ ::Nokogiri::XML::Node::SaveOptions::AS_XML |
159
+ ::Nokogiri::XML::Node::SaveOptions::FORMAT
160
+ )
161
+ end
162
+
163
+ # Load XML document from file and create a new {MarcRecord} for it.
164
+ # @param [String] filename name of XML Marc file
165
+ def self.load(filename)
166
+ doc = ::Libis::Tools::XmlDocument.open(filename)
167
+ self.new(doc.root)
168
+ end
169
+
170
+ # Load XML document from stream and create a new {MarcRecord} for it.
171
+ # @param [IO,String] io input stream
172
+ def self.read(io)
173
+ io = StringIO.new(io) if io.is_a? String
174
+ doc = ::Libis::Tools::XmlDocument.parse(io)
175
+ self.new(doc.root)
176
+ end
177
+
178
+ # Dump Marc record in Aleph Sequential format
179
+ # @return [String] Aleph sequential output
180
+ def to_aseq
181
+ record = ''
182
+ doc_number = tag('001').datas
183
+
184
+ all.select {|t| t.is_a? Libis::Metadata::FixField}.each {|t| record += "#{format('%09s', doc_number)} #{t.tag} L #{t.datas}\n"}
185
+ all.select {|t| t.is_a? Libis::Metadata::VarField}.each {|t|
186
+ record += "#{format('%09s', doc_number)} #{t.tag}#{t.ind1}#{t.ind2} L "
187
+ t.keys.each {|k|
188
+ t.subfield_array(k).each {|f|
189
+ record += "$$#{k}#{CGI::unescapeHTML(f)}"
190
+ }
191
+ }
192
+ record += "\n"
193
+ }
194
+
195
+ record
196
+ end
197
+
198
+ protected
199
+
200
+ def element(*parts)
201
+ opts = options parts
202
+ field_format(opts, *parts)
203
+ end
204
+
205
+ def list_s(*parts)
206
+ opts = options parts, join: ' '
207
+ field_format(opts, *parts)
208
+ end
209
+
210
+ def list_c(*parts)
211
+ opts = options parts, join: ', '
212
+ field_format(opts, *parts)
213
+ end
214
+
215
+ def list_d(*parts)
216
+ opts = options parts, join: ' - '
217
+ field_format(opts, *parts)
218
+ end
219
+
220
+ def repeat(*parts)
221
+ opts = options parts, join: '; '
222
+ field_format(opts, *parts)
223
+ end
224
+
225
+ def opt_r(*parts)
226
+ opts = options parts, fix: '()'
227
+ field_format(opts, *parts)
228
+ end
229
+
230
+ def opt_s(*parts)
231
+ opts = options parts, fix: '[]'
232
+ field_format(opts, *parts)
233
+ end
234
+
235
+ def odis_link(group, id, label)
236
+ "http://www.odis.be/lnk/#{group.downcase[0, 2]}_#{id}\##{label}"
237
+ end
238
+
239
+ private
240
+
241
+ def options(args, default = {})
242
+ default.merge(args.last.is_a?(::Hash) ? args.pop : {})
243
+ end
244
+
245
+ def field_format(default_options, *parts)
246
+ Libis::Metadata::FieldFormat.new(*parts).add_default_options(default_options).to_s
247
+ end
248
+
249
+ def get_records(tag, ind1 = '', ind2 = '', subfield = nil, subfields = '', &block)
250
+
251
+ ind1 ||= ''
252
+ ind2 ||= ''
253
+ subfields ||= ''
254
+
255
+ ind1.tr!('_', ' ')
256
+ ind1.tr!('#', '')
257
+
258
+ ind2.tr!('_', ' ')
259
+ ind2.tr!('#', '')
260
+
261
+ found = all[tag].select do |v|
262
+ result = v.is_a?(Libis::Metadata::FixField) ||
263
+ ((ind1.empty? or v.ind1 == ind1) &&
264
+ (ind2.empty? or v.ind2 == ind2) &&
265
+ v.match(subfields)
266
+ )
267
+ result &&= block.call(v) if block
268
+ result
269
+ end
270
+
271
+ return found unless subfield
272
+
273
+ # duplicate tags for subfield instances
274
+ found.map do |field|
275
+ next unless field.is_a? Libis::Metadata::FixField
276
+ field.subfield_data[subfield].map do |sfield|
277
+ field.dup.subfield_data[subfield] = [sfield]
278
+ end
279
+ end.compact.flatten
280
+
281
+ end
282
+
283
+ end
284
+ end
285
+ end
@@ -0,0 +1,116 @@
1
+ require 'parslet'
2
+ require 'parslet/convenience'
3
+
4
+ module Libis
5
+ module Metadata
6
+ module Parser
7
+ # noinspection RubyResolve
8
+
9
+ # New style parsers and converters for metadata. New, not finished and untested.
10
+ class BasicParser < Parslet::Parser
11
+ # space
12
+ rule(:space) {match('\s')}
13
+ rule(:space?) {space.maybe}
14
+ rule(:spaces) {space.repeat(1)}
15
+ rule(:spaces?) {space.repeat}
16
+
17
+ # numbers
18
+ rule(:number) {match('[0-9]')}
19
+ rule(:number?) {number.maybe}
20
+ rule(:integer) {number.repeat(1)}
21
+
22
+ # chars
23
+ rule(:character) {match(/[a-z]/i)}
24
+ rule(:character?) {character.maybe}
25
+ rule(:characters) {character.repeat(1)}
26
+
27
+ # word
28
+ rule(:wordchar) {match('\w')}
29
+
30
+ # name
31
+ rule(:name_string) {((character | underscore) >> wordchar.repeat).repeat(1)}
32
+
33
+ # text
34
+ rule(:other) {not_paren}
35
+ rule(:text) {other.repeat(1)}
36
+ rule(:text?) {text.maybe}
37
+
38
+ # special chars
39
+ rule(:minus) {str('-')}
40
+ rule(:colon) {str(':')}
41
+ rule(:semicolon) {str(';')}
42
+ rule(:underscore) {str('_')}
43
+ rule(:hashtag) {str('#')}
44
+ rule(:dollar) {str('$')}
45
+ rule(:star) {str('*')}
46
+
47
+ # grouping
48
+ rule(:paren) {lparen | rparen}
49
+ rule(:lparen) {lrparen | lsparen | lcparen | squote | dquote}
50
+ rule(:rparen) {rrparen | rsparen | rcparen | squote | dquote}
51
+
52
+ rule(:not_paren) {paren.absent? >> any}
53
+ rule(:not_lparen) {lrparen.absent? >> lsparen.absent? >> lcparen.absent? >> squote.absent? >> dquote.absent? >> any}
54
+ rule(:not_rparen) {rrparen.absent? >> rsparen.absent? >> rcparen.absent? >> squote.absent? >> dquote.absent? >> any}
55
+
56
+ rule(:lrparen) {str('(')}
57
+ rule(:lsparen) {str('[')}
58
+ rule(:lcparen) {str('{')}
59
+ rule(:rrparen) {str(')')}
60
+ rule(:rsparen) {str(']')}
61
+ rule(:rcparen) {str('}')}
62
+
63
+ rule(:squote) {str("'")}
64
+ rule(:dquote) {str('"')}
65
+ rule(:quote) {squote | dquote}
66
+
67
+ rule(:not_squote) {squote.absent? >> any}
68
+ rule(:not_dquote) {dquote.absent? >> any}
69
+ rule(:not_quote) {quote.absent? >> any}
70
+
71
+ def complement(char)
72
+ case char
73
+ when '('
74
+ ')'
75
+ when '{'
76
+ '}'
77
+ when '['
78
+ ']'
79
+ else
80
+ char
81
+ end
82
+ end
83
+
84
+ def grouped(foo, left_paren = lparen)
85
+ scope {
86
+ left_paren.capture(:paren).as(:lparen) >>
87
+ foo >>
88
+ dynamic {|_, c| str(complement(c.captures[:paren]))}.as(:rparen)
89
+ }
90
+ end
91
+
92
+ def grouped_anonymous(foo, left_paren = lparen)
93
+ scope {
94
+ left_paren.capture(:paren) >>
95
+ foo >>
96
+ dynamic {|_, c| str(complement(c.captures[:paren]))}
97
+ }
98
+ end
99
+
100
+ def any_quoted(key = :text)
101
+ scope {
102
+ quote.capture(:quote) >>
103
+ dynamic {|_, c| (str(c.captures[:quote]).absent? >> any).repeat(1)}.maybe.as(key) >>
104
+ dynamic {|_, c| str(c.captures[:quote])}
105
+ }
106
+ end
107
+
108
+ def transformer
109
+ self.class::Transformer.new rescue nil
110
+ end
111
+
112
+ end
113
+
114
+ end
115
+ end
116
+ end