libis-tools 0.9.20 → 0.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +36 -233
  3. data/Rakefile +5 -0
  4. data/lib/libis/tools.rb +1 -0
  5. data/lib/libis/tools/assert.rb +11 -0
  6. data/lib/libis/tools/checksum.rb +22 -5
  7. data/lib/libis/tools/command.rb +24 -3
  8. data/lib/libis/tools/config.rb +61 -33
  9. data/lib/libis/tools/config_file.rb +0 -1
  10. data/lib/libis/tools/deep_struct.rb +10 -2
  11. data/lib/libis/tools/extend/empty.rb +2 -2
  12. data/lib/libis/tools/extend/hash.rb +37 -18
  13. data/lib/libis/tools/extend/kernel.rb +9 -0
  14. data/lib/libis/tools/extend/string.rb +17 -8
  15. data/lib/libis/tools/logger.rb +95 -44
  16. data/lib/libis/tools/metadata.rb +5 -1
  17. data/lib/libis/tools/metadata/dublin_core_record.rb +22 -4
  18. data/lib/libis/tools/metadata/field_format.rb +49 -9
  19. data/lib/libis/tools/metadata/fix_field.rb +5 -0
  20. data/lib/libis/tools/metadata/mapper.rb +2 -1
  21. data/lib/libis/tools/metadata/mappers/flandrica.rb +8 -1
  22. data/lib/libis/tools/metadata/mappers/kuleuven.rb +6 -2
  23. data/lib/libis/tools/metadata/marc21_record.rb +1 -0
  24. data/lib/libis/tools/metadata/marc_record.rb +31 -12
  25. data/lib/libis/tools/metadata/parser/basic_parser.rb +2 -0
  26. data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +2 -1
  27. data/lib/libis/tools/metadata/parser/marc21_parser.rb +2 -1
  28. data/lib/libis/tools/metadata/parser/marc_format_parser.rb +2 -1
  29. data/lib/libis/tools/metadata/parser/marc_rules.rb +2 -1
  30. data/lib/libis/tools/metadata/parser/marc_select_parser.rb +2 -1
  31. data/lib/libis/tools/metadata/parser/patch.rb +1 -0
  32. data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +2 -1
  33. data/lib/libis/tools/metadata/sharepoint_mapping.rb +1 -0
  34. data/lib/libis/tools/metadata/sharepoint_record.rb +2 -0
  35. data/lib/libis/tools/metadata/var_field.rb +8 -0
  36. data/lib/libis/tools/mets_dnx.rb +61 -0
  37. data/lib/libis/tools/mets_file.rb +87 -604
  38. data/lib/libis/tools/mets_objects.rb +534 -0
  39. data/lib/libis/tools/parameter.rb +144 -21
  40. data/lib/libis/tools/thread_safe.rb +31 -0
  41. data/lib/libis/tools/version.rb +1 -1
  42. data/lib/libis/tools/xml_document.rb +18 -24
  43. data/libis-tools.gemspec +6 -2
  44. data/spec/config_spec.rb +3 -4
  45. data/spec/logger_spec.rb +13 -30
  46. data/spec/mets_file_spec.rb +17 -17
  47. metadata +53 -7
@@ -6,10 +6,17 @@ module Libis
6
6
  module Tools
7
7
  module Metadata
8
8
 
9
+ # Conveniece class to create and read DC records.
10
+ # Most of the functionality is derived from the {::Libis::Tools::XmlDocument} base class. This class puts its
11
+ # focus on supporting the <dc:xxx> and <dcterms:xxx> namespaces. For most tags the namespaces are added
12
+ # automatically by checking which tag you want to add. In some cases the same tag exists in both namespaces and
13
+ # you may want to state the namespace explicitely. Even then things are made as easily as possible.
9
14
  class DublinCoreRecord < Libis::Tools::XmlDocument
10
15
 
16
+ # List of known tags in the DC namespace
11
17
  DC_ELEMENTS = %w'contributor coverage creator date description format identifier language' +
12
18
  %w'publisher relation rights source subject title type'
19
+ # List of known tags in the DCTERMS namespace
13
20
  DCTERMS_ELEMENTS = %w'abstract accessRights accrualMethod accrualPeriodicity accrualPolicy alternative' +
14
21
  %w'audience available bibliographicCitation conformsTo contributor coverage created creator date' +
15
22
  %w'dateAccepted dateCopyrighted dateSubmitted description educationLevel extent format hasFormat' +
@@ -18,6 +25,11 @@ module Libis
18
25
  %w'references relation replaces requires rights rightsHolder source spatial subject tableOfContents' +
19
26
  %w'temporal title type valid'
20
27
 
28
+ # Create new DC document.
29
+ # If the doc parameter is nil a new empty DC document will be created with the dc:record root element and all
30
+ # required namespaces defined.
31
+ # @note The input document is not checked if it is a valid DC record XML.
32
+ # @param [::Libis::Tools::XmlDocument,String,IO,Hash] doc optional document to read.
21
33
  def initialize(doc = nil)
22
34
  super()
23
35
  xml_doc = case doc
@@ -45,10 +57,9 @@ module Libis
45
57
  raise ArgumentError, 'XML document not valid.' if self.invalid?
46
58
  end
47
59
 
48
- def all
49
- @all_records ||= get_all_records
50
- end
51
-
60
+ # Search the document with xpath.
61
+ # If no namespace is present, the 'dc:' namespace will be added.
62
+ # @param [String] path any valid XPath expression
52
63
  def xpath(path)
53
64
  m = /^([\/.]*\/)?(dc(terms)?:)?(.*)/.match(path.to_s)
54
65
  return [] unless m[4]
@@ -56,6 +67,13 @@ module Libis
56
67
  @document.xpath(path.to_s)
57
68
  end
58
69
 
70
+ # Add a node.
71
+ # You can omit the namespace in the name parameter. The method will add the correct namespace for you. If using
72
+ # symbols for name, an underscore ('_') can be used as separator instead of the colon (':').
73
+ # @param [String,Symbol] name tag name of the element
74
+ # @param [String] value content of the new element
75
+ # @param [Nokogiri::XML::Node] parent the new element will be attached to this node
76
+ # @param [Hash] attributes list of <attribute_name>, <attribute_value> pairs for the new element
59
77
  def add_node(name, value = nil, parent = nil, attributes = {})
60
78
  ns, tag = get_namespace(name.to_s)
61
79
  (attributes[:namespaces] ||= {})[:node_ns] ||= ns if ns
@@ -4,18 +4,56 @@ module Libis
4
4
  module Tools
5
5
  module Metadata
6
6
 
7
+ # Helper class for formatting field data.
8
+ #
9
+ # The FieldFormat class can omit prefix and or postfix if no data is present and omits the join string if only
10
+ # one data element is present.
7
11
  class FieldFormat
8
12
 
13
+ # [Array] the list that makes up the data
9
14
  attr_accessor :parts
15
+
16
+ # [String] the text that will be placed in front of the generated text
10
17
  attr_accessor :prefix
11
- attr_accessor :join
18
+
19
+ # [String] the text that will be placed at the end of the generated text
12
20
  attr_accessor :postfix
13
21
 
22
+ # [String] the text used between the parts of the data
23
+ attr_accessor :join
24
+
25
+ # Create new formatter
26
+ #
27
+ # The method takes any number of arguments and processes them as data parts. If the last one is a Hash, it is
28
+ # interpreted as options hash. The data parts can either be given as an Array or set of arguments or within the
29
+ # options hash with key +:parts+.
30
+ #
31
+ # On each element in the data set the formatter will call the #to_s method to
32
+ # give each data object the opportunity to process it's data.
33
+ #
34
+ # @param [Array, Hash] parts whatever makes up the data to be formatted.
14
35
  def initialize(*parts)
15
36
  @parts = []
16
37
  self[*parts]
17
38
  end
18
39
 
40
+ # Parses the arguments, stripping of an optional last Hash as options.
41
+ # @param (see #initialize)
42
+ def [](*parts)
43
+ options = parts.last.is_a?(Hash) ? parts.pop : {}
44
+ add parts
45
+ x = options.delete(:parts)
46
+ add x if x
47
+ add_options options
48
+ end
49
+
50
+ # Set options.
51
+ #
52
+ # Besides the tree options +:prefix+, +:postfix+ and +:join+ it also accepts the option +:fix+. This combines
53
+ # both +:prefix+ and +:postfix+ options by specifying "<prefix>|<postfix>". If both prefix and postfix are only
54
+ # 1 character wide the format "<prefix><postfix>" is also allowed.
55
+ #
56
+ # @param [Hash] options the options list
19
57
  def add_options(options = {})
20
58
  if options[:fix]
21
59
  if options[:fix].size == 2
@@ -30,6 +68,10 @@ module Libis
30
68
  self
31
69
  end
32
70
 
71
+ # Add default options.
72
+ # (see #add_options)
73
+ # None of these options will be set if they are already set. If you need to overwrite them, use {#add_options}.
74
+ # @param (see #add_options)
33
75
  def add_default_options(options = {})
34
76
  options.delete(:prefix) if @prefix
35
77
  options.delete(:postfix) if @postfix
@@ -38,18 +80,14 @@ module Libis
38
80
  add_options options
39
81
  end
40
82
 
41
- def [](*parts)
42
- options = parts.last.is_a?(Hash) ? parts.pop : {}
43
- add parts
44
- x = options.delete(:parts)
45
- add x if x
46
- add_options options
47
- end
48
-
83
+ # Shortcut class method for initializer
49
84
  def self.from(*h)
50
85
  self.new(*h)
51
86
  end
52
87
 
88
+ # The real formatter method.
89
+ # This method parses the data and applies the options to generate the formatted string.
90
+ # @return [String] the formatter string
53
91
  def to_s
54
92
  @parts.delete_if { |x|
55
93
  x.nil? or
@@ -63,6 +101,8 @@ module Libis
63
101
  result
64
102
  end
65
103
 
104
+ protected
105
+
66
106
  def add(part)
67
107
  case part
68
108
  when Hash
@@ -4,16 +4,21 @@ module Libis
4
4
  module Tools
5
5
  module Metadata
6
6
 
7
+ # Helper class for implementing a fixed field for MARC
7
8
  class FixField
8
9
 
9
10
  attr_reader :tag
10
11
  attr_accessor :datas
11
12
 
13
+ # Create new fixed field
14
+ # @param [String] tag tag
15
+ # @param [String] datas field data
12
16
  def initialize(tag, datas)
13
17
  @tag = tag
14
18
  @datas = datas || ''
15
19
  end
16
20
 
21
+
17
22
  def [](from = nil, to = nil)
18
23
  return @datas unless from
19
24
  to ? @datas[from..to] : @datas[from]
@@ -8,8 +8,9 @@ require_relative 'parsers'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class Mapper
14
15
 
15
16
  attr_reader :target_parser, :selection_parser, :format_parser
@@ -8,8 +8,15 @@ module Libis
8
8
  module Mappers
9
9
 
10
10
  # noinspection RubyResolve
11
+
12
+ # Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
13
+ # {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for Flandrica by
14
+ # extending the version for {::Libis::Tools::Metadata::Mappers::Kuleuven KU Leuven} and overwriting what's
15
+ # different. This means any change to the KU Leuven mapping may have effect on this mapping as well.
11
16
  module Flandrica
12
- include Libis::Tools::Metadata::Mappers::Kuleuven
17
+ extend Libis::Tools::Metadata::Mappers::Kuleuven
18
+
19
+ protected
13
20
 
14
21
  def marc2dc_identifier(xml)
15
22
  Libis::Tools::Metadata::Mappers::Kuleuven.marc2dc_identifier(xml)
@@ -8,10 +8,15 @@ module Libis
8
8
  module Tools
9
9
  module Metadata
10
10
  module Mappers
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
14
+ # {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for KU Leuven.
13
15
  module Kuleuven
14
16
 
17
+ # Main conversion method.
18
+ # @param [String] label optional extra identified to add to the DC record.
19
+ # @return [::Libis::Tools::Metadata::DublinCoreRecord]
15
20
  def to_dc(label = nil)
16
21
  assert(self.is_a? Libis::Tools::Metadata::MarcRecord)
17
22
 
@@ -1400,7 +1405,6 @@ module Libis
1400
1405
  if DOLLAR4TABLE[data.tag].has_key? code
1401
1406
  return DOLLAR4TABLE[data.tag][code][1]
1402
1407
  end
1403
- Application.logger.warn(self.class) { "Did not find $4 value in lookuptable: #{data.dump_line}" }
1404
1408
  :contributor
1405
1409
  end
1406
1410
 
@@ -8,6 +8,7 @@ module Libis
8
8
  module Tools
9
9
  module Metadata
10
10
 
11
+ # This class implements the missing private method 'get_all_records' to accomodate for the MARC-XML format.
11
12
  class Marc21Record < Libis::Tools::Metadata::MarcRecord
12
13
 
13
14
  private
@@ -85,11 +85,19 @@ module Libis
85
85
 
86
86
  alias_method :each_tag, :all_tags
87
87
 
88
+ # Get all fields matching search criteria.
89
+ # As {#all_tags} but without subfield criteria.
90
+ # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
91
+ # extra subfield name is added, a result will be created for each instance found of that subfield.
92
+ # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
93
+ # (the field) and should return true or false. True selects the field, false rejects it.
94
+ # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
95
+ # for each tag found. Otherwise the array will just contain the data for each matching tag.
88
96
  def select_fields(tag, select_block = nil, &block)
89
97
  all_tags(tag, nil, select_block, &block)
90
98
  end
91
99
 
92
- # Find the first field matching the criteria.
100
+ # Find the first tag matching the criteria.
93
101
  #
94
102
  # If a block is supplied, it will be called with the found field data. The return value will be whatever the
95
103
  # block returns. If no block is supplied, the field data will be returned. If nothing was found, the return
@@ -105,34 +113,43 @@ module Libis
105
113
  yield result
106
114
  end
107
115
 
116
+ # Find all fields matching the criteria.
117
+ # (see #first_tag)
118
+ # @param (see #first_tag)
108
119
  def all_fields(tag, subfields)
109
- r = all_tags(tag, subfields).collect { |tag| tag.subfields_array(subfields) }.flatten.compact
120
+ r = all_tags(tag, subfields).collect { |t| t.subfields_array(subfields) }.flatten.compact
110
121
  return r unless block_given?
111
122
  r.map { |field| yield field }
112
123
  r.size > 0
113
124
  end
114
125
 
115
- def first_field(t, s)
116
- result = all_fields(t, s).first
126
+ # Find the first field matching the criteria
127
+ # (see #all_fields)
128
+ # @param (see #all_fields)
129
+ def first_field(tag, subfields)
130
+ result = all_fields(tag, subfields).first
117
131
  return result unless block_given?
118
132
  return false unless result
119
133
  yield result
120
134
  true
121
135
  end
122
136
 
123
-
124
- def each_field(t, s)
125
- all_fields(t, s).each do |field|
137
+ # Perform action on each field found. Code block required.
138
+ # @param (see #all_fields)
139
+ def each_field(tag, subfields)
140
+ all_fields(tag, subfields).each do |field|
126
141
  yield field
127
142
  end
128
143
  end
129
144
 
145
+ # Dump content to string.
130
146
  def marc_dump
131
147
  all.values.flatten.each_with_object([]) { |record, m| m << record.dump }.join
132
148
  end
133
149
 
150
+ # Save the current MARC record to file.
151
+ # @param [String] filename name of the file
134
152
  def save(filename)
135
-
136
153
  doc = ::Libis::Tools::XmlDocument.new
137
154
  doc.root = @node
138
155
 
@@ -142,23 +159,25 @@ module Libis
142
159
  ::Nokogiri::XML::Node::SaveOptions::AS_XML |
143
160
  ::Nokogiri::XML::Node::SaveOptions::FORMAT
144
161
  )
145
-
146
162
  end
147
163
 
164
+ # Load XML document from file and create a new {MarcRecord} for it.
165
+ # @param [String] filename name of XML Marc file
148
166
  def self.load(filename)
149
-
150
167
  doc = ::Libis::Tools::XmlDocument.open(filename)
151
168
  self.new(doc.root)
152
-
153
169
  end
154
170
 
171
+ # Load XML document from stream and create a new {MarcRecord} for it.
172
+ # @param [IO,String] io input stream
155
173
  def self.read(io)
156
174
  io = StringIO.new(io) if io.is_a? String
157
175
  doc = ::Libis::Tools::XmlDocument.parse(io)
158
176
  self.new(doc.root)
159
-
160
177
  end
161
178
 
179
+ # Dump Marc record in Aleph Sequential format
180
+ # @return [String] Aleph sequential output
162
181
  def to_aseq
163
182
  record = ''
164
183
  doc_number = tag('001').datas
@@ -7,6 +7,8 @@ module Libis
7
7
  module Tools
8
8
  module Metadata
9
9
  # noinspection RubyResolve
10
+
11
+ # New style parsers and converters for metadata. New, not finished and untested.
10
12
  class BasicParser < Parslet::Parser
11
13
  # space
12
14
  rule(:space) { match('\s') }
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
7
7
  module Libis
8
8
  module Tools
9
9
  module Metadata
10
-
11
10
  # noinspection RubyResolve
11
+
12
+ # New style parsers and converters for metadata. New, not finished and untested.
12
13
  class DublinCoreParser < Libis::Tools::Metadata::BasicParser
13
14
  rule(:namespace) { match('[^:]').repeat(1).as(:namespace) >> str(':') }
14
15
  rule(:namespace?) { namespace.maybe }
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class Marc21Parser < Libis::Tools::Metadata::BasicParser
14
15
 
15
16
  root(:marc21)
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class MarcFormatParser < Libis::Tools::Metadata::BasicParser
14
15
  include Libis::Tools::Metadata::MarcRules
15
16
 
@@ -5,8 +5,9 @@ require 'parslet'
5
5
  module Libis
6
6
  module Tools
7
7
  module Metadata
8
-
9
8
  # noinspection RubyResolve
9
+
10
+ # New style parsers and converters for metadata. New, not finished and untested.
10
11
  module MarcRules
11
12
  include Parslet
12
13
 
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class MarcSelectParser < Libis::Tools::Metadata::BasicParser
14
15
  include Libis::Tools::Metadata::MarcRules
15
16
  root(:MARC)
@@ -1,3 +1,4 @@
1
+ # New style parsers and converters for metadata. New, not finished and untested.
1
2
  class Parslet::Pattern
2
3
 
3
4
  def element_match_hash(tree, exp, bindings)
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
7
7
  module Libis
8
8
  module Tools
9
9
  module Metadata
10
-
11
10
  # noinspection RubyResolve
11
+
12
+ # New style parsers and converters for metadata. New, not finished and untested.
12
13
  class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
13
14
 
14
15
  root(:criteria)
@@ -8,6 +8,7 @@ require 'libis/tools/extend/hash'
8
8
  module Libis
9
9
  module Tools
10
10
 
11
+ # Copy of old Sharepoint mapping class. Needs inspection and probably a mayor update.
11
12
  class SharepointMapping < Hash
12
13
 
13
14
  def initialize(mapping_file)
@@ -9,6 +9,8 @@ module Libis
9
9
  module Tools
10
10
 
11
11
  # noinspection RubyTooManyMethodsInspection
12
+
13
+ # Copy of the old SharepointRecord class. Needs inspection and probably a mayor update.
12
14
  class SharepointRecord < Hash
13
15
 
14
16
  attr_accessor :node