libis-tools 0.9.65 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +5 -23
  3. data/lib/libis/tools.rb +0 -1
  4. data/lib/libis/tools/version.rb +1 -1
  5. data/libis-tools.gemspec +1 -1
  6. metadata +4 -51
  7. data/lib/libis/tools/metadata.rb +0 -25
  8. data/lib/libis/tools/metadata/dublin_core_record.rb +0 -116
  9. data/lib/libis/tools/metadata/field_format.rb +0 -121
  10. data/lib/libis/tools/metadata/fix_field.rb +0 -35
  11. data/lib/libis/tools/metadata/mapper.rb +0 -81
  12. data/lib/libis/tools/metadata/mappers/flandrica.rb +0 -76
  13. data/lib/libis/tools/metadata/mappers/kuleuven.rb +0 -1929
  14. data/lib/libis/tools/metadata/mappers/scope.rb +0 -46
  15. data/lib/libis/tools/metadata/marc21_record.rb +0 -51
  16. data/lib/libis/tools/metadata/marc_record.rb +0 -287
  17. data/lib/libis/tools/metadata/parser/basic_parser.rb +0 -120
  18. data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +0 -37
  19. data/lib/libis/tools/metadata/parser/marc21_parser.rb +0 -207
  20. data/lib/libis/tools/metadata/parser/marc_format_parser.rb +0 -53
  21. data/lib/libis/tools/metadata/parser/marc_rules.rb +0 -36
  22. data/lib/libis/tools/metadata/parser/marc_select_parser.rb +0 -26
  23. data/lib/libis/tools/metadata/parser/patch.rb +0 -22
  24. data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +0 -72
  25. data/lib/libis/tools/metadata/parsers.rb +0 -12
  26. data/lib/libis/tools/metadata/sharepoint_mapping.rb +0 -119
  27. data/lib/libis/tools/metadata/sharepoint_record.rb +0 -262
  28. data/lib/libis/tools/metadata/var_field.rb +0 -242
  29. data/spec/data/MetadataMapping.xlsx +0 -0
  30. data/spec/metadata/123456789.marc +0 -18
  31. data/spec/metadata/8389207.marc +0 -117
  32. data/spec/metadata/BE_942855_1927_4898_corrected.XML +0 -11
  33. data/spec/metadata/BE_942855_1927_4898_md.XML +0 -11
  34. data/spec/metadata/dublin_core_parser_spec.rb +0 -48
  35. data/spec/metadata/dublin_core_spec.rb +0 -81
  36. data/spec/metadata/marc21_parser_data.rb +0 -382
  37. data/spec/metadata/marc21_parser_spec.rb +0 -67
  38. data/spec/metadata/marc21_spec.rb +0 -178
  39. data/spec/metadata/metadata_mapper_spec.rb +0 -23
  40. data/spec/metadata/scope_mapper_spec.rb +0 -29
  41. data/test.rb +0 -61
@@ -1,46 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'libis/tools/metadata/dublin_core_record'
4
- require 'libis/tools/assert'
5
-
6
- module Libis
7
- module Tools
8
- module Metadata
9
- module Mappers
10
- # noinspection RubyResolve
11
-
12
- # Mixin for {::Libis::Tools::Metadata::DublinCoreRecord} to enable conversion of the Scope exported DC record.
13
- module Scope
14
-
15
- # Main conversion method.
16
- # @return [::Libis::Tools::Metadata::DublinCoreRecord]
17
- def to_dc
18
- assert(self.is_a? Libis::Tools::Metadata::DublinCoreRecord)
19
-
20
- doc = Libis::Tools::Metadata::DublinCoreRecord.new(self.to_xml)
21
-
22
- if doc.isPartOf
23
-
24
- # create new node for isReferencedBy
25
- new_node = doc.add_node(
26
- 'isReferencedBy',
27
- doc.isPartOf.content,
28
- nil,
29
- 'xsi:type' => 'dcterms:URI'
30
- )
31
-
32
- # Replace isPartOf with isReferencedBy
33
- doc.isPartOf.replace new_node
34
-
35
- end
36
-
37
- doc
38
-
39
- end
40
-
41
- end
42
-
43
- end
44
- end
45
- end
46
- end
@@ -1,51 +0,0 @@
1
- # coding: utf-8
2
-
3
- require 'cgi'
4
-
5
- require_relative 'marc_record'
6
-
7
- module Libis
8
- module Tools
9
- module Metadata
10
-
11
- # This class implements the missing private method 'get_all_records' to accomodate for the MARC-XML format.
12
- class Marc21Record < Libis::Tools::Metadata::MarcRecord
13
-
14
- private
15
-
16
- def get_all_records
17
-
18
- @all_records.clear
19
-
20
- @node.xpath('.//leader').each { |f|
21
- @all_records['LDR'] << FixField.new('LDR', f.content)
22
- }
23
-
24
- @node.xpath('.//controlfield').each { |f|
25
- tag = f['tag']
26
- tag = '%03d' % tag.to_i if tag.size < 3
27
- @all_records[tag] << FixField.new(tag, f.content)
28
- }
29
-
30
- @node.xpath('.//datafield').each { |v|
31
-
32
- tag = v['tag']
33
- tag = '%03d' % tag.to_i if tag.size < 3
34
-
35
- varfield = VarField.new(tag, v['ind1'].to_s, v['ind2'].to_s)
36
-
37
- v.xpath('.//subfield').each { |s| varfield.add_subfield(s['code'], s.content) }
38
-
39
- @all_records[tag] << varfield
40
-
41
- }
42
-
43
- @all_records
44
-
45
- end
46
-
47
- end
48
-
49
- end
50
- end
51
- end
@@ -1,287 +0,0 @@
1
- # coding: utf-8
2
-
3
- require 'set'
4
- require 'cgi'
5
-
6
- require 'libis/tools/xml_document'
7
- require 'libis/tools/assert'
8
-
9
- require_relative 'fix_field'
10
- require_relative 'var_field'
11
- require_relative 'field_format'
12
-
13
- module Libis
14
- module Tools
15
- module Metadata
16
-
17
- # noinspection RubyTooManyMethodsInspection
18
-
19
- # Base class for reading MARC based records.
20
- #
21
- # For indicator selection: '#' or '' (empty) is wildcard; '_' or ' ' (space) is blank.
22
- class MarcRecord
23
-
24
- # Create a new MarcRecord object
25
- #
26
- # @param [XML node] xml_node XML node from Nokogiri or XmlDocument that contains child nodes with the data for
27
- # one MARC record.
28
- def initialize(xml_node)
29
- @node = xml_node
30
- @node.document.remove_namespaces!
31
- @all_records = Hash.new { |h, k| h[k] = Array.new }
32
- end
33
-
34
- # Access to the XML node that was supplied to the constructor
35
- # @return [XML node]
36
- def to_raw
37
- @node
38
- end
39
-
40
- # Returns the internal data structure (a Hash) with all the MARC data.
41
- #
42
- # The internal structure is a Hash with the tag as key and as value an Array of either FixField or VarField
43
- # instances.
44
- #
45
- # @return [Hash] internal data structure
46
- def all
47
- return @all_records unless @all_records.empty?
48
- @all_records = get_all_records
49
- end
50
-
51
- # Iterates over all the MARC fields.
52
- #
53
- # If a block is supplied it will be called for each field in the MARC record. The supplied argument will be the
54
- # FixField or VarField instance for each field.
55
- #
56
- # @return [Array] The list of the field data or return values for each block call.
57
- def each
58
- all.map { |_, field_array| field_array }.flatten.map do |field|
59
- block_given? ? yield(field) : field
60
- end
61
- end
62
-
63
- # Get all fields matching search criteria.
64
- #
65
- # A block with one parameter can be supplied when calling this method. Each time a match is found, the block
66
- # will be called with the field data as argument and the return value of the block will be added to the method's
67
- # return value. This could for example be used to narrow the selection of the fields:
68
- #
69
- # # Only select 700 tags where $4 subfield contains 'abc', 'def' or 'xyz'
70
- # record.all_tags('700') { |v| v.subfield['4'] =~ /^(abc|def|xyz)$/ ? v : nil }.compact
71
- #
72
- # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
73
- # extra subfield name is added, a result will be created for each instance found of that subfield.
74
- # @param [String] subfields Subfield specification. See FieldFormat class for more info; ignored for controlfields.
75
- # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
76
- # (the field) and should return true or false. True selects the field, false rejects it.
77
- # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
78
- # for each tag found. Otherwise the array will just contain the data for each matching tag.
79
- def all_tags(tag, subfields = '', select_block = Proc.new { |_| true})
80
- t, ind1, ind2, subfield = tag =~ /^\d{3}/ ? [tag[0..2], tag[3], tag[4], tag[5]] : [tag, nil, nil, nil]
81
- result = get_records(t, ind1, ind2, subfield, subfields, &select_block)
82
- return result unless block_given?
83
- result.map { |record| yield record }
84
- end
85
-
86
- alias_method :each_tag, :all_tags
87
-
88
- # Get all fields matching search criteria.
89
- # As {#all_tags} but without subfield criteria.
90
- # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
91
- # extra subfield name is added, a result will be created for each instance found of that subfield.
92
- # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
93
- # (the field) and should return true or false. True selects the field, false rejects it.
94
- # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
95
- # for each tag found. Otherwise the array will just contain the data for each matching tag.
96
- def select_fields(tag, select_block = nil, &block)
97
- all_tags(tag, nil, select_block, &block)
98
- end
99
-
100
- # Find the first tag matching the criteria.
101
- #
102
- # If a block is supplied, it will be called with the found field data. The return value will be whatever the
103
- # block returns. If no block is supplied, the field data will be returned. If nothing was found, the return
104
- # value is nil.
105
- #
106
- # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank.
107
- # @param [String] subfields Subfield specification. See FieldFormat class for more info; ignored for controlfields.
108
- # @return [Object] nil if nothing found; field data or whatever block returns.
109
- def first_tag(tag, subfields = '')
110
- result = all_tags(tag, subfields).first
111
- return nil unless result
112
- return result unless block_given?
113
- yield result
114
- end
115
-
116
- # Find all fields matching the criteria.
117
- # (see #first_tag)
118
- # @param (see #first_tag)
119
- def all_fields(tag, subfields)
120
- r = all_tags(tag, subfields).collect { |t| t.subfields_array(subfields) }.flatten.compact
121
- return r unless block_given?
122
- r.map { |field| yield field }
123
- r.size > 0
124
- end
125
-
126
- # Find the first field matching the criteria
127
- # (see #all_fields)
128
- # @param (see #all_fields)
129
- def first_field(tag, subfields)
130
- result = all_fields(tag, subfields).first
131
- return result unless block_given?
132
- return false unless result
133
- yield result
134
- true
135
- end
136
-
137
- # Perform action on each field found. Code block required.
138
- # @param (see #all_fields)
139
- def each_field(tag, subfields)
140
- all_fields(tag, subfields).each do |field|
141
- yield field
142
- end
143
- end
144
-
145
- # Dump content to string.
146
- def marc_dump
147
- all.values.flatten.each_with_object([]) { |record, m| m << record.dump }.join
148
- end
149
-
150
- # Save the current MARC record to file.
151
- # @param [String] filename name of the file
152
- def save(filename)
153
- doc = ::Libis::Tools::XmlDocument.new
154
- doc.root = @node
155
-
156
- return doc unless filename
157
-
158
- doc.save filename, save_with: (::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS |
159
- ::Nokogiri::XML::Node::SaveOptions::AS_XML |
160
- ::Nokogiri::XML::Node::SaveOptions::FORMAT
161
- )
162
- end
163
-
164
- # Load XML document from file and create a new {MarcRecord} for it.
165
- # @param [String] filename name of XML Marc file
166
- def self.load(filename)
167
- doc = ::Libis::Tools::XmlDocument.open(filename)
168
- self.new(doc.root)
169
- end
170
-
171
- # Load XML document from stream and create a new {MarcRecord} for it.
172
- # @param [IO,String] io input stream
173
- def self.read(io)
174
- io = StringIO.new(io) if io.is_a? String
175
- doc = ::Libis::Tools::XmlDocument.parse(io)
176
- self.new(doc.root)
177
- end
178
-
179
- # Dump Marc record in Aleph Sequential format
180
- # @return [String] Aleph sequential output
181
- def to_aseq
182
- record = ''
183
- doc_number = tag('001').datas
184
-
185
- all.select { |t| t.is_a? Libis::Tools::Metadata::FixField }.each { |t| record += "#{format('%09s', doc_number)} #{t.tag} L #{t.datas}\n" }
186
- all.select { |t| t.is_a? Libis::Tools::Metadata::VarField }.each { |t|
187
- record += "#{format('%09s', doc_number)} #{t.tag}#{t.ind1}#{t.ind2} L "
188
- t.keys.each { |k|
189
- t.subfield_array(k).each { |f|
190
- record += "$$#{k}#{CGI::unescapeHTML(f)}"
191
- }
192
- }
193
- record += "\n"
194
- }
195
-
196
- record
197
- end
198
-
199
- protected
200
-
201
- def element(*parts)
202
- opts = options parts
203
- field_format(opts, *parts)
204
- end
205
-
206
- def list_s(*parts)
207
- opts = options parts, join: ' '
208
- field_format(opts, *parts)
209
- end
210
-
211
- def list_c(*parts)
212
- opts = options parts, join: ', '
213
- field_format(opts, *parts)
214
- end
215
-
216
- def list_d(*parts)
217
- opts = options parts, join: ' - '
218
- field_format(opts, *parts)
219
- end
220
-
221
- def repeat(*parts)
222
- opts = options parts, join: '; '
223
- field_format(opts, *parts)
224
- end
225
-
226
- def opt_r(*parts)
227
- opts = options parts, fix: '()'
228
- field_format(opts, *parts)
229
- end
230
-
231
- def opt_s(*parts)
232
- opts = options parts, fix: '[]'
233
- field_format(opts, *parts)
234
- end
235
-
236
- def odis_link(group, id, label)
237
- "http://www.odis.be/lnk/#{group.downcase[0, 2]}_#{id}\##{label}"
238
- end
239
-
240
- private
241
-
242
- def options(args, default = {})
243
- default.merge(args.last.is_a?(::Hash) ? args.pop : {})
244
- end
245
-
246
- def field_format(default_options, *parts)
247
- Libis::Tools::Metadata::FieldFormat.new(*parts).add_default_options(default_options).to_s
248
- end
249
-
250
- def get_records(tag, ind1 = '', ind2 = '', subfield = nil, subfields = '', &block)
251
-
252
- ind1 ||= ''
253
- ind2 ||= ''
254
- subfields ||= ''
255
-
256
- ind1.tr!('_', ' ')
257
- ind1.tr!('#', '')
258
-
259
- ind2.tr!('_', ' ')
260
- ind2.tr!('#', '')
261
-
262
- found = all[tag].select do |v|
263
- result = v.is_a?(Libis::Tools::Metadata::FixField) ||
264
- ((ind1.empty? or v.ind1 == ind1) &&
265
- (ind2.empty? or v.ind2 == ind2) &&
266
- v.match(subfields)
267
- )
268
- result &&= block.call(v) if block
269
- result
270
- end
271
-
272
- return found unless subfield
273
-
274
- # duplicate tags for subfield instances
275
- found.map do |field|
276
- next unless field.is_a? Libis::Tools::Metadata::FixField
277
- field.subfield_data[subfield].map do |sfield|
278
- field.dup.subfield_data[subfield] = [sfield]
279
- end
280
- end.compact.flatten
281
-
282
- end
283
-
284
- end
285
- end
286
- end
287
- end
@@ -1,120 +0,0 @@
1
- # encoding: utf-8
2
-
3
- require 'parslet'
4
- require 'parslet/convenience'
5
-
6
- module Libis
7
- module Tools
8
- module Metadata
9
- # noinspection RubyResolve
10
-
11
- # New style parsers and converters for metadata. New, not finished and untested.
12
- class BasicParser < Parslet::Parser
13
- # space
14
- rule(:space) { match('\s') }
15
- rule(:space?) { space.maybe }
16
- rule(:spaces) { space.repeat(1) }
17
- rule(:spaces?) { space.repeat }
18
-
19
- # numbers
20
- rule(:number) { match('[0-9]') }
21
- rule(:number?) { number.maybe }
22
- rule(:integer) { number.repeat(1) }
23
-
24
- # chars
25
- rule(:character) { match(/[a-z]/i) }
26
- rule(:character?) { character.maybe }
27
- rule(:characters) { character.repeat(1) }
28
-
29
- # word
30
- rule(:wordchar) { match('\w') }
31
-
32
- # name
33
- rule(:name_string) { ((character | underscore) >> wordchar.repeat).repeat(1) }
34
-
35
- # text
36
- rule(:other) { not_paren }
37
- rule(:text) { other.repeat(1) }
38
- rule(:text?) { text.maybe }
39
-
40
- # special chars
41
- rule(:minus) { str('-') }
42
- rule(:colon) { str(':') }
43
- rule(:semicolon) { str(';') }
44
- rule(:underscore) { str('_') }
45
- rule(:hashtag) { str('#') }
46
- rule(:dollar) { str('$') }
47
- rule(:star) { str('*') }
48
-
49
- # grouping
50
- rule(:paren) { lparen | rparen }
51
- rule(:lparen) { lrparen | lsparen | lcparen | squote | dquote }
52
- rule(:rparen) { rrparen | rsparen | rcparen | squote | dquote }
53
-
54
- rule(:not_paren) { paren.absent? >> any }
55
- rule(:not_lparen) { lrparen.absent? >> lsparen.absent? >> lcparen.absent? >> squote.absent? >> dquote.absent? >> any }
56
- rule(:not_rparen) { rrparen.absent? >> rsparen.absent? >> rcparen.absent? >> squote.absent? >> dquote.absent? >> any }
57
-
58
- rule(:lrparen) { str('(') }
59
- rule(:lsparen) { str('[') }
60
- rule(:lcparen) { str('{') }
61
- rule(:rrparen) { str(')') }
62
- rule(:rsparen) { str(']') }
63
- rule(:rcparen) { str('}') }
64
-
65
- rule(:squote) { str("'") }
66
- rule(:dquote) { str('"') }
67
- rule(:quote) { squote | dquote }
68
-
69
- rule(:not_squote) { squote.absent? >> any }
70
- rule(:not_dquote) { dquote.absent? >> any }
71
- rule(:not_quote) { quote.absent? >> any }
72
-
73
- def complement(char)
74
- case char
75
- when '('
76
- ')'
77
- when '{'
78
- '}'
79
- when '['
80
- ']'
81
- else
82
- char
83
- end
84
- end
85
-
86
- def grouped(foo, left_paren = lparen)
87
- scope {
88
- left_paren.capture(:paren).as(:lparen) >>
89
- foo >>
90
- dynamic { |_, c| str(complement(c.captures[:paren])) }.as(:rparen)
91
- }
92
- end
93
-
94
- def grouped_anonymous(foo, left_paren = lparen)
95
- scope {
96
- left_paren.capture(:paren) >>
97
- foo >>
98
- dynamic { |_, c| str(complement(c.captures[:paren])) }
99
- }
100
- end
101
-
102
- def any_quoted(key = :text)
103
- scope {
104
- quote.capture(:quote) >>
105
- dynamic { |_, c| (str(c.captures[:quote]).absent? >> any).repeat(1) }.maybe.as(key) >>
106
- dynamic { |_, c| str(c.captures[:quote]) }
107
- }
108
- end
109
-
110
- def transformer
111
- self.class::Transformer.new rescue nil
112
- end
113
-
114
- end
115
-
116
- end
117
- end
118
- end
119
-
120
-