libis-tools 0.9.65 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +5 -23
  3. data/lib/libis/tools.rb +0 -1
  4. data/lib/libis/tools/version.rb +1 -1
  5. data/libis-tools.gemspec +1 -1
  6. metadata +4 -51
  7. data/lib/libis/tools/metadata.rb +0 -25
  8. data/lib/libis/tools/metadata/dublin_core_record.rb +0 -116
  9. data/lib/libis/tools/metadata/field_format.rb +0 -121
  10. data/lib/libis/tools/metadata/fix_field.rb +0 -35
  11. data/lib/libis/tools/metadata/mapper.rb +0 -81
  12. data/lib/libis/tools/metadata/mappers/flandrica.rb +0 -76
  13. data/lib/libis/tools/metadata/mappers/kuleuven.rb +0 -1929
  14. data/lib/libis/tools/metadata/mappers/scope.rb +0 -46
  15. data/lib/libis/tools/metadata/marc21_record.rb +0 -51
  16. data/lib/libis/tools/metadata/marc_record.rb +0 -287
  17. data/lib/libis/tools/metadata/parser/basic_parser.rb +0 -120
  18. data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +0 -37
  19. data/lib/libis/tools/metadata/parser/marc21_parser.rb +0 -207
  20. data/lib/libis/tools/metadata/parser/marc_format_parser.rb +0 -53
  21. data/lib/libis/tools/metadata/parser/marc_rules.rb +0 -36
  22. data/lib/libis/tools/metadata/parser/marc_select_parser.rb +0 -26
  23. data/lib/libis/tools/metadata/parser/patch.rb +0 -22
  24. data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +0 -72
  25. data/lib/libis/tools/metadata/parsers.rb +0 -12
  26. data/lib/libis/tools/metadata/sharepoint_mapping.rb +0 -119
  27. data/lib/libis/tools/metadata/sharepoint_record.rb +0 -262
  28. data/lib/libis/tools/metadata/var_field.rb +0 -242
  29. data/spec/data/MetadataMapping.xlsx +0 -0
  30. data/spec/metadata/123456789.marc +0 -18
  31. data/spec/metadata/8389207.marc +0 -117
  32. data/spec/metadata/BE_942855_1927_4898_corrected.XML +0 -11
  33. data/spec/metadata/BE_942855_1927_4898_md.XML +0 -11
  34. data/spec/metadata/dublin_core_parser_spec.rb +0 -48
  35. data/spec/metadata/dublin_core_spec.rb +0 -81
  36. data/spec/metadata/marc21_parser_data.rb +0 -382
  37. data/spec/metadata/marc21_parser_spec.rb +0 -67
  38. data/spec/metadata/marc21_spec.rb +0 -178
  39. data/spec/metadata/metadata_mapper_spec.rb +0 -23
  40. data/spec/metadata/scope_mapper_spec.rb +0 -29
  41. data/test.rb +0 -61
@@ -1,46 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'libis/tools/metadata/dublin_core_record'
4
- require 'libis/tools/assert'
5
-
6
- module Libis
7
- module Tools
8
- module Metadata
9
- module Mappers
10
- # noinspection RubyResolve
11
-
12
- # Mixin for {::Libis::Tools::Metadata::DublinCoreRecord} to enable conversion of the Scope exported DC record.
13
- module Scope
14
-
15
- # Main conversion method.
16
- # @return [::Libis::Tools::Metadata::DublinCoreRecord]
17
- def to_dc
18
- assert(self.is_a? Libis::Tools::Metadata::DublinCoreRecord)
19
-
20
- doc = Libis::Tools::Metadata::DublinCoreRecord.new(self.to_xml)
21
-
22
- if doc.isPartOf
23
-
24
- # create new node for isReferencedBy
25
- new_node = doc.add_node(
26
- 'isReferencedBy',
27
- doc.isPartOf.content,
28
- nil,
29
- 'xsi:type' => 'dcterms:URI'
30
- )
31
-
32
- # Replace isPartOf with isReferencedBy
33
- doc.isPartOf.replace new_node
34
-
35
- end
36
-
37
- doc
38
-
39
- end
40
-
41
- end
42
-
43
- end
44
- end
45
- end
46
- end
@@ -1,51 +0,0 @@
1
- # coding: utf-8
2
-
3
- require 'cgi'
4
-
5
- require_relative 'marc_record'
6
-
7
- module Libis
8
- module Tools
9
- module Metadata
10
-
11
- # This class implements the missing private method 'get_all_records' to accomodate for the MARC-XML format.
12
- class Marc21Record < Libis::Tools::Metadata::MarcRecord
13
-
14
- private
15
-
16
- def get_all_records
17
-
18
- @all_records.clear
19
-
20
- @node.xpath('.//leader').each { |f|
21
- @all_records['LDR'] << FixField.new('LDR', f.content)
22
- }
23
-
24
- @node.xpath('.//controlfield').each { |f|
25
- tag = f['tag']
26
- tag = '%03d' % tag.to_i if tag.size < 3
27
- @all_records[tag] << FixField.new(tag, f.content)
28
- }
29
-
30
- @node.xpath('.//datafield').each { |v|
31
-
32
- tag = v['tag']
33
- tag = '%03d' % tag.to_i if tag.size < 3
34
-
35
- varfield = VarField.new(tag, v['ind1'].to_s, v['ind2'].to_s)
36
-
37
- v.xpath('.//subfield').each { |s| varfield.add_subfield(s['code'], s.content) }
38
-
39
- @all_records[tag] << varfield
40
-
41
- }
42
-
43
- @all_records
44
-
45
- end
46
-
47
- end
48
-
49
- end
50
- end
51
- end
@@ -1,287 +0,0 @@
1
- # coding: utf-8
2
-
3
- require 'set'
4
- require 'cgi'
5
-
6
- require 'libis/tools/xml_document'
7
- require 'libis/tools/assert'
8
-
9
- require_relative 'fix_field'
10
- require_relative 'var_field'
11
- require_relative 'field_format'
12
-
13
- module Libis
14
- module Tools
15
- module Metadata
16
-
17
- # noinspection RubyTooManyMethodsInspection
18
-
19
- # Base class for reading MARC based records.
20
- #
21
- # For indicator selection: '#' or '' (empty) is wildcard; '_' or ' ' (space) is blank.
22
- class MarcRecord
23
-
24
- # Create a new MarcRecord object
25
- #
26
- # @param [XML node] xml_node XML node from Nokogiri or XmlDocument that contains child nodes with the data for
27
- # one MARC record.
28
- def initialize(xml_node)
29
- @node = xml_node
30
- @node.document.remove_namespaces!
31
- @all_records = Hash.new { |h, k| h[k] = Array.new }
32
- end
33
-
34
- # Access to the XML node that was supplied to the constructor
35
- # @return [XML node]
36
- def to_raw
37
- @node
38
- end
39
-
40
- # Returns the internal data structure (a Hash) with all the MARC data.
41
- #
42
- # The internal structure is a Hash with the tag as key and as value an Array of either FixField or VarField
43
- # instances.
44
- #
45
- # @return [Hash] internal data structure
46
- def all
47
- return @all_records unless @all_records.empty?
48
- @all_records = get_all_records
49
- end
50
-
51
- # Iterates over all the MARC fields.
52
- #
53
- # If a block is supplied it will be called for each field in the MARC record. The supplied argument will be the
54
- # FixField or VarField instance for each field.
55
- #
56
- # @return [Array] The list of the field data or return values for each block call.
57
- def each
58
- all.map { |_, field_array| field_array }.flatten.map do |field|
59
- block_given? ? yield(field) : field
60
- end
61
- end
62
-
63
- # Get all fields matching search criteria.
64
- #
65
- # A block with one parameter can be supplied when calling this method. Each time a match is found, the block
66
- # will be called with the field data as argument and the return value of the block will be added to the method's
67
- # return value. This could for example be used to narrow the selection of the fields:
68
- #
69
- # # Only select 700 tags where $4 subfield contains 'abc', 'def' or 'xyz'
70
- # record.all_tags('700') { |v| v.subfield['4'] =~ /^(abc|def|xyz)$/ ? v : nil }.compact
71
- #
72
- # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
73
- # extra subfield name is added, a result will be created for each instance found of that subfield.
74
- # @param [String] subfields Subfield specification. See FieldFormat class for more info; ignored for controlfields.
75
- # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
76
- # (the field) and should return true or false. True selects the field, false rejects it.
77
- # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
78
- # for each tag found. Otherwise the array will just contain the data for each matching tag.
79
- def all_tags(tag, subfields = '', select_block = Proc.new { |_| true})
80
- t, ind1, ind2, subfield = tag =~ /^\d{3}/ ? [tag[0..2], tag[3], tag[4], tag[5]] : [tag, nil, nil, nil]
81
- result = get_records(t, ind1, ind2, subfield, subfields, &select_block)
82
- return result unless block_given?
83
- result.map { |record| yield record }
84
- end
85
-
86
- alias_method :each_tag, :all_tags
87
-
88
- # Get all fields matching search criteria.
89
- # As {#all_tags} but without subfield criteria.
90
- # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
91
- # extra subfield name is added, a result will be created for each instance found of that subfield.
92
- # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
93
- # (the field) and should return true or false. True selects the field, false rejects it.
94
- # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
95
- # for each tag found. Otherwise the array will just contain the data for each matching tag.
96
- def select_fields(tag, select_block = nil, &block)
97
- all_tags(tag, nil, select_block, &block)
98
- end
99
-
100
- # Find the first tag matching the criteria.
101
- #
102
- # If a block is supplied, it will be called with the found field data. The return value will be whatever the
103
- # block returns. If no block is supplied, the field data will be returned. If nothing was found, the return
104
- # value is nil.
105
- #
106
- # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank.
107
- # @param [String] subfields Subfield specification. See FieldFormat class for more info; ignored for controlfields.
108
- # @return [Object] nil if nothing found; field data or whatever block returns.
109
- def first_tag(tag, subfields = '')
110
- result = all_tags(tag, subfields).first
111
- return nil unless result
112
- return result unless block_given?
113
- yield result
114
- end
115
-
116
- # Find all fields matching the criteria.
117
- # (see #first_tag)
118
- # @param (see #first_tag)
119
- def all_fields(tag, subfields)
120
- r = all_tags(tag, subfields).collect { |t| t.subfields_array(subfields) }.flatten.compact
121
- return r unless block_given?
122
- r.map { |field| yield field }
123
- r.size > 0
124
- end
125
-
126
- # Find the first field matching the criteria
127
- # (see #all_fields)
128
- # @param (see #all_fields)
129
- def first_field(tag, subfields)
130
- result = all_fields(tag, subfields).first
131
- return result unless block_given?
132
- return false unless result
133
- yield result
134
- true
135
- end
136
-
137
- # Perform action on each field found. Code block required.
138
- # @param (see #all_fields)
139
- def each_field(tag, subfields)
140
- all_fields(tag, subfields).each do |field|
141
- yield field
142
- end
143
- end
144
-
145
- # Dump content to string.
146
- def marc_dump
147
- all.values.flatten.each_with_object([]) { |record, m| m << record.dump }.join
148
- end
149
-
150
- # Save the current MARC record to file.
151
- # @param [String] filename name of the file
152
- def save(filename)
153
- doc = ::Libis::Tools::XmlDocument.new
154
- doc.root = @node
155
-
156
- return doc unless filename
157
-
158
- doc.save filename, save_with: (::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS |
159
- ::Nokogiri::XML::Node::SaveOptions::AS_XML |
160
- ::Nokogiri::XML::Node::SaveOptions::FORMAT
161
- )
162
- end
163
-
164
- # Load XML document from file and create a new {MarcRecord} for it.
165
- # @param [String] filename name of XML Marc file
166
- def self.load(filename)
167
- doc = ::Libis::Tools::XmlDocument.open(filename)
168
- self.new(doc.root)
169
- end
170
-
171
- # Load XML document from stream and create a new {MarcRecord} for it.
172
- # @param [IO,String] io input stream
173
- def self.read(io)
174
- io = StringIO.new(io) if io.is_a? String
175
- doc = ::Libis::Tools::XmlDocument.parse(io)
176
- self.new(doc.root)
177
- end
178
-
179
- # Dump Marc record in Aleph Sequential format
180
- # @return [String] Aleph sequential output
181
- def to_aseq
182
- record = ''
183
- doc_number = tag('001').datas
184
-
185
- all.select { |t| t.is_a? Libis::Tools::Metadata::FixField }.each { |t| record += "#{format('%09s', doc_number)} #{t.tag} L #{t.datas}\n" }
186
- all.select { |t| t.is_a? Libis::Tools::Metadata::VarField }.each { |t|
187
- record += "#{format('%09s', doc_number)} #{t.tag}#{t.ind1}#{t.ind2} L "
188
- t.keys.each { |k|
189
- t.subfield_array(k).each { |f|
190
- record += "$$#{k}#{CGI::unescapeHTML(f)}"
191
- }
192
- }
193
- record += "\n"
194
- }
195
-
196
- record
197
- end
198
-
199
- protected
200
-
201
- def element(*parts)
202
- opts = options parts
203
- field_format(opts, *parts)
204
- end
205
-
206
- def list_s(*parts)
207
- opts = options parts, join: ' '
208
- field_format(opts, *parts)
209
- end
210
-
211
- def list_c(*parts)
212
- opts = options parts, join: ', '
213
- field_format(opts, *parts)
214
- end
215
-
216
- def list_d(*parts)
217
- opts = options parts, join: ' - '
218
- field_format(opts, *parts)
219
- end
220
-
221
- def repeat(*parts)
222
- opts = options parts, join: '; '
223
- field_format(opts, *parts)
224
- end
225
-
226
- def opt_r(*parts)
227
- opts = options parts, fix: '()'
228
- field_format(opts, *parts)
229
- end
230
-
231
- def opt_s(*parts)
232
- opts = options parts, fix: '[]'
233
- field_format(opts, *parts)
234
- end
235
-
236
- def odis_link(group, id, label)
237
- "http://www.odis.be/lnk/#{group.downcase[0, 2]}_#{id}\##{label}"
238
- end
239
-
240
- private
241
-
242
- def options(args, default = {})
243
- default.merge(args.last.is_a?(::Hash) ? args.pop : {})
244
- end
245
-
246
- def field_format(default_options, *parts)
247
- Libis::Tools::Metadata::FieldFormat.new(*parts).add_default_options(default_options).to_s
248
- end
249
-
250
- def get_records(tag, ind1 = '', ind2 = '', subfield = nil, subfields = '', &block)
251
-
252
- ind1 ||= ''
253
- ind2 ||= ''
254
- subfields ||= ''
255
-
256
- ind1.tr!('_', ' ')
257
- ind1.tr!('#', '')
258
-
259
- ind2.tr!('_', ' ')
260
- ind2.tr!('#', '')
261
-
262
- found = all[tag].select do |v|
263
- result = v.is_a?(Libis::Tools::Metadata::FixField) ||
264
- ((ind1.empty? or v.ind1 == ind1) &&
265
- (ind2.empty? or v.ind2 == ind2) &&
266
- v.match(subfields)
267
- )
268
- result &&= block.call(v) if block
269
- result
270
- end
271
-
272
- return found unless subfield
273
-
274
- # duplicate tags for subfield instances
275
- found.map do |field|
276
- next unless field.is_a? Libis::Tools::Metadata::FixField
277
- field.subfield_data[subfield].map do |sfield|
278
- field.dup.subfield_data[subfield] = [sfield]
279
- end
280
- end.compact.flatten
281
-
282
- end
283
-
284
- end
285
- end
286
- end
287
- end
@@ -1,120 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'parslet'
4
- require 'parslet/convenience'
5
-
6
- module Libis
7
- module Tools
8
- module Metadata
9
- # noinspection RubyResolve
10
-
11
- # New style parsers and converters for metadata. New, not finished and untested.
12
- class BasicParser < Parslet::Parser
13
- # space
14
- rule(:space) { match('\s') }
15
- rule(:space?) { space.maybe }
16
- rule(:spaces) { space.repeat(1) }
17
- rule(:spaces?) { space.repeat }
18
-
19
- # numbers
20
- rule(:number) { match('[0-9]') }
21
- rule(:number?) { number.maybe }
22
- rule(:integer) { number.repeat(1) }
23
-
24
- # chars
25
- rule(:character) { match(/[a-z]/i) }
26
- rule(:character?) { character.maybe }
27
- rule(:characters) { character.repeat(1) }
28
-
29
- # word
30
- rule(:wordchar) { match('\w') }
31
-
32
- # name
33
- rule(:name_string) { ((character | underscore) >> wordchar.repeat).repeat(1) }
34
-
35
- # text
36
- rule(:other) { not_paren }
37
- rule(:text) { other.repeat(1) }
38
- rule(:text?) { text.maybe }
39
-
40
- # special chars
41
- rule(:minus) { str('-') }
42
- rule(:colon) { str(':') }
43
- rule(:semicolon) { str(';') }
44
- rule(:underscore) { str('_') }
45
- rule(:hashtag) { str('#') }
46
- rule(:dollar) { str('$') }
47
- rule(:star) { str('*') }
48
-
49
- # grouping
50
- rule(:paren) { lparen | rparen }
51
- rule(:lparen) { lrparen | lsparen | lcparen | squote | dquote }
52
- rule(:rparen) { rrparen | rsparen | rcparen | squote | dquote }
53
-
54
- rule(:not_paren) { paren.absent? >> any }
55
- rule(:not_lparen) { lrparen.absent? >> lsparen.absent? >> lcparen.absent? >> squote.absent? >> dquote.absent? >> any }
56
- rule(:not_rparen) { rrparen.absent? >> rsparen.absent? >> rcparen.absent? >> squote.absent? >> dquote.absent? >> any }
57
-
58
- rule(:lrparen) { str('(') }
59
- rule(:lsparen) { str('[') }
60
- rule(:lcparen) { str('{') }
61
- rule(:rrparen) { str(')') }
62
- rule(:rsparen) { str(']') }
63
- rule(:rcparen) { str('}') }
64
-
65
- rule(:squote) { str("'") }
66
- rule(:dquote) { str('"') }
67
- rule(:quote) { squote | dquote }
68
-
69
- rule(:not_squote) { squote.absent? >> any }
70
- rule(:not_dquote) { dquote.absent? >> any }
71
- rule(:not_quote) { quote.absent? >> any }
72
-
73
- def complement(char)
74
- case char
75
- when '('
76
- ')'
77
- when '{'
78
- '}'
79
- when '['
80
- ']'
81
- else
82
- char
83
- end
84
- end
85
-
86
- def grouped(foo, left_paren = lparen)
87
- scope {
88
- left_paren.capture(:paren).as(:lparen) >>
89
- foo >>
90
- dynamic { |_, c| str(complement(c.captures[:paren])) }.as(:rparen)
91
- }
92
- end
93
-
94
- def grouped_anonymous(foo, left_paren = lparen)
95
- scope {
96
- left_paren.capture(:paren) >>
97
- foo >>
98
- dynamic { |_, c| str(complement(c.captures[:paren])) }
99
- }
100
- end
101
-
102
- def any_quoted(key = :text)
103
- scope {
104
- quote.capture(:quote) >>
105
- dynamic { |_, c| (str(c.captures[:quote]).absent? >> any).repeat(1) }.maybe.as(key) >>
106
- dynamic { |_, c| str(c.captures[:quote]) }
107
- }
108
- end
109
-
110
- def transformer
111
- self.class::Transformer.new rescue nil
112
- end
113
-
114
- end
115
-
116
- end
117
- end
118
- end
119
-
120
-