libis-tools 0.9.20 → 0.9.21

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +36 -233
  3. data/Rakefile +5 -0
  4. data/lib/libis/tools.rb +1 -0
  5. data/lib/libis/tools/assert.rb +11 -0
  6. data/lib/libis/tools/checksum.rb +22 -5
  7. data/lib/libis/tools/command.rb +24 -3
  8. data/lib/libis/tools/config.rb +61 -33
  9. data/lib/libis/tools/config_file.rb +0 -1
  10. data/lib/libis/tools/deep_struct.rb +10 -2
  11. data/lib/libis/tools/extend/empty.rb +2 -2
  12. data/lib/libis/tools/extend/hash.rb +37 -18
  13. data/lib/libis/tools/extend/kernel.rb +9 -0
  14. data/lib/libis/tools/extend/string.rb +17 -8
  15. data/lib/libis/tools/logger.rb +95 -44
  16. data/lib/libis/tools/metadata.rb +5 -1
  17. data/lib/libis/tools/metadata/dublin_core_record.rb +22 -4
  18. data/lib/libis/tools/metadata/field_format.rb +49 -9
  19. data/lib/libis/tools/metadata/fix_field.rb +5 -0
  20. data/lib/libis/tools/metadata/mapper.rb +2 -1
  21. data/lib/libis/tools/metadata/mappers/flandrica.rb +8 -1
  22. data/lib/libis/tools/metadata/mappers/kuleuven.rb +6 -2
  23. data/lib/libis/tools/metadata/marc21_record.rb +1 -0
  24. data/lib/libis/tools/metadata/marc_record.rb +31 -12
  25. data/lib/libis/tools/metadata/parser/basic_parser.rb +2 -0
  26. data/lib/libis/tools/metadata/parser/dublin_core_parser.rb +2 -1
  27. data/lib/libis/tools/metadata/parser/marc21_parser.rb +2 -1
  28. data/lib/libis/tools/metadata/parser/marc_format_parser.rb +2 -1
  29. data/lib/libis/tools/metadata/parser/marc_rules.rb +2 -1
  30. data/lib/libis/tools/metadata/parser/marc_select_parser.rb +2 -1
  31. data/lib/libis/tools/metadata/parser/patch.rb +1 -0
  32. data/lib/libis/tools/metadata/parser/subfield_criteria_parser.rb +2 -1
  33. data/lib/libis/tools/metadata/sharepoint_mapping.rb +1 -0
  34. data/lib/libis/tools/metadata/sharepoint_record.rb +2 -0
  35. data/lib/libis/tools/metadata/var_field.rb +8 -0
  36. data/lib/libis/tools/mets_dnx.rb +61 -0
  37. data/lib/libis/tools/mets_file.rb +87 -604
  38. data/lib/libis/tools/mets_objects.rb +534 -0
  39. data/lib/libis/tools/parameter.rb +144 -21
  40. data/lib/libis/tools/thread_safe.rb +31 -0
  41. data/lib/libis/tools/version.rb +1 -1
  42. data/lib/libis/tools/xml_document.rb +18 -24
  43. data/libis-tools.gemspec +6 -2
  44. data/spec/config_spec.rb +3 -4
  45. data/spec/logger_spec.rb +13 -30
  46. data/spec/mets_file_spec.rb +17 -17
  47. metadata +53 -7
@@ -6,10 +6,17 @@ module Libis
6
6
  module Tools
7
7
  module Metadata
8
8
 
9
+ # Conveniece class to create and read DC records.
10
+ # Most of the functionality is derived from the {::Libis::Tools::XmlDocument} base class. This class puts its
11
+ # focus on supporting the <dc:xxx> and <dcterms:xxx> namespaces. For most tags the namespaces are added
12
+ # automatically by checking which tag you want to add. In some cases the same tag exists in both namespaces and
13
+ # you may want to state the namespace explicitely. Even then things are made as easily as possible.
9
14
  class DublinCoreRecord < Libis::Tools::XmlDocument
10
15
 
16
+ # List of known tags in the DC namespace
11
17
  DC_ELEMENTS = %w'contributor coverage creator date description format identifier language' +
12
18
  %w'publisher relation rights source subject title type'
19
+ # List of known tags in the DCTERMS namespace
13
20
  DCTERMS_ELEMENTS = %w'abstract accessRights accrualMethod accrualPeriodicity accrualPolicy alternative' +
14
21
  %w'audience available bibliographicCitation conformsTo contributor coverage created creator date' +
15
22
  %w'dateAccepted dateCopyrighted dateSubmitted description educationLevel extent format hasFormat' +
@@ -18,6 +25,11 @@ module Libis
18
25
  %w'references relation replaces requires rights rightsHolder source spatial subject tableOfContents' +
19
26
  %w'temporal title type valid'
20
27
 
28
+ # Create new DC document.
29
+ # If the doc parameter is nil a new empty DC document will be created with the dc:record root element and all
30
+ # required namespaces defined.
31
+ # @note The input document is not checked if it is a valid DC record XML.
32
+ # @param [::Libis::Tools::XmlDocument,String,IO,Hash] doc optional document to read.
21
33
  def initialize(doc = nil)
22
34
  super()
23
35
  xml_doc = case doc
@@ -45,10 +57,9 @@ module Libis
45
57
  raise ArgumentError, 'XML document not valid.' if self.invalid?
46
58
  end
47
59
 
48
- def all
49
- @all_records ||= get_all_records
50
- end
51
-
60
+ # Search the document with xpath.
61
+ # If no namespace is present, the 'dc:' namespace will be added.
62
+ # @param [String] path any valid XPath expression
52
63
  def xpath(path)
53
64
  m = /^([\/.]*\/)?(dc(terms)?:)?(.*)/.match(path.to_s)
54
65
  return [] unless m[4]
@@ -56,6 +67,13 @@ module Libis
56
67
  @document.xpath(path.to_s)
57
68
  end
58
69
 
70
+ # Add a node.
71
+ # You can omit the namespace in the name parameter. The method will add the correct namespace for you. If using
72
+ # symbols for name, an underscore ('_') can be used as separator instead of the colon (':').
73
+ # @param [String,Symbol] name tag name of the element
74
+ # @param [String] value content of the new element
75
+ # @param [Nokogiri::XML::Node] parent the new element will be attached to this node
76
+ # @param [Hash] attributes list of <attribute_name>, <attribute_value> pairs for the new element
59
77
  def add_node(name, value = nil, parent = nil, attributes = {})
60
78
  ns, tag = get_namespace(name.to_s)
61
79
  (attributes[:namespaces] ||= {})[:node_ns] ||= ns if ns
@@ -4,18 +4,56 @@ module Libis
4
4
  module Tools
5
5
  module Metadata
6
6
 
7
+ # Helper class for formatting field data.
8
+ #
9
+ # The FieldFormat class can omit prefix and or postfix if no data is present and omits the join string if only
10
+ # one data element is present.
7
11
  class FieldFormat
8
12
 
13
+ # [Array] the list that makes up the data
9
14
  attr_accessor :parts
15
+
16
+ # [String] the text that will be placed in front of the generated text
10
17
  attr_accessor :prefix
11
- attr_accessor :join
18
+
19
+ # [String] the text that will be placed at the end of the generated text
12
20
  attr_accessor :postfix
13
21
 
22
+ # [String] the text used between the parts of the data
23
+ attr_accessor :join
24
+
25
+ # Create new formatter
26
+ #
27
+ # The method takes any number of arguments and processes them as data parts. If the last one is a Hash, it is
28
+ # interpreted as options hash. The data parts can either be given as an Array or set of arguments or within the
29
+ # options hash with key +:parts+.
30
+ #
31
+ # On each element in the data set the formatter will call the #to_s method to
32
+ # give each data object the opportunity to process it's data.
33
+ #
34
+ # @param [Array, Hash] parts whatever makes up the data to be formatted.
14
35
  def initialize(*parts)
15
36
  @parts = []
16
37
  self[*parts]
17
38
  end
18
39
 
40
+ # Parses the arguments, stripping of an optional last Hash as options.
41
+ # @param (see #initialize)
42
+ def [](*parts)
43
+ options = parts.last.is_a?(Hash) ? parts.pop : {}
44
+ add parts
45
+ x = options.delete(:parts)
46
+ add x if x
47
+ add_options options
48
+ end
49
+
50
+ # Set options.
51
+ #
52
+ # Besides the tree options +:prefix+, +:postfix+ and +:join+ it also accepts the option +:fix+. This combines
53
+ # both +:prefix+ and +:postfix+ options by specifying "<prefix>|<postfix>". If both prefix and postfix are only
54
+ # 1 character wide the format "<prefix><postfix>" is also allowed.
55
+ #
56
+ # @param [Hash] options the options list
19
57
  def add_options(options = {})
20
58
  if options[:fix]
21
59
  if options[:fix].size == 2
@@ -30,6 +68,10 @@ module Libis
30
68
  self
31
69
  end
32
70
 
71
+ # Add default options.
72
+ # (see #add_options)
73
+ # None of these options will be set if they are already set. If you need to overwrite them, use {#add_options}.
74
+ # @param (see #add_options)
33
75
  def add_default_options(options = {})
34
76
  options.delete(:prefix) if @prefix
35
77
  options.delete(:postfix) if @postfix
@@ -38,18 +80,14 @@ module Libis
38
80
  add_options options
39
81
  end
40
82
 
41
- def [](*parts)
42
- options = parts.last.is_a?(Hash) ? parts.pop : {}
43
- add parts
44
- x = options.delete(:parts)
45
- add x if x
46
- add_options options
47
- end
48
-
83
+ # Shortcut class method for initializer
49
84
  def self.from(*h)
50
85
  self.new(*h)
51
86
  end
52
87
 
88
+ # The real formatter method.
89
+ # This method parses the data and applies the options to generate the formatted string.
90
+ # @return [String] the formatter string
53
91
  def to_s
54
92
  @parts.delete_if { |x|
55
93
  x.nil? or
@@ -63,6 +101,8 @@ module Libis
63
101
  result
64
102
  end
65
103
 
104
+ protected
105
+
66
106
  def add(part)
67
107
  case part
68
108
  when Hash
@@ -4,16 +4,21 @@ module Libis
4
4
  module Tools
5
5
  module Metadata
6
6
 
7
+ # Helper class for implementing a fixed field for MARC
7
8
  class FixField
8
9
 
9
10
  attr_reader :tag
10
11
  attr_accessor :datas
11
12
 
13
+ # Create new fixed field
14
+ # @param [String] tag tag
15
+ # @param [String] datas field data
12
16
  def initialize(tag, datas)
13
17
  @tag = tag
14
18
  @datas = datas || ''
15
19
  end
16
20
 
21
+
17
22
  def [](from = nil, to = nil)
18
23
  return @datas unless from
19
24
  to ? @datas[from..to] : @datas[from]
@@ -8,8 +8,9 @@ require_relative 'parsers'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class Mapper
14
15
 
15
16
  attr_reader :target_parser, :selection_parser, :format_parser
@@ -8,8 +8,15 @@ module Libis
8
8
  module Mappers
9
9
 
10
10
  # noinspection RubyResolve
11
+
12
+ # Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
13
+ # {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for Flandrica by
14
+ # extending the version for {::Libis::Tools::Metadata::Mappers::Kuleuven KU Leuven} and overwriting what's
15
+ # different. This means any change to the KU Leuven mapping may have effect on this mapping as well.
11
16
  module Flandrica
12
- include Libis::Tools::Metadata::Mappers::Kuleuven
17
+ extend Libis::Tools::Metadata::Mappers::Kuleuven
18
+
19
+ protected
13
20
 
14
21
  def marc2dc_identifier(xml)
15
22
  Libis::Tools::Metadata::Mappers::Kuleuven.marc2dc_identifier(xml)
@@ -8,10 +8,15 @@ module Libis
8
8
  module Tools
9
9
  module Metadata
10
10
  module Mappers
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # Mixin for {::Libis::Tools::Metadata::MarcRecord} to enable conversion into
14
+ # {Libis::Tools::Metadata::DublinCoreRecord}. This module implements the conversion mapping for KU Leuven.
13
15
  module Kuleuven
14
16
 
17
+ # Main conversion method.
18
+ # @param [String] label optional extra identified to add to the DC record.
19
+ # @return [::Libis::Tools::Metadata::DublinCoreRecord]
15
20
  def to_dc(label = nil)
16
21
  assert(self.is_a? Libis::Tools::Metadata::MarcRecord)
17
22
 
@@ -1400,7 +1405,6 @@ module Libis
1400
1405
  if DOLLAR4TABLE[data.tag].has_key? code
1401
1406
  return DOLLAR4TABLE[data.tag][code][1]
1402
1407
  end
1403
- Application.logger.warn(self.class) { "Did not find $4 value in lookuptable: #{data.dump_line}" }
1404
1408
  :contributor
1405
1409
  end
1406
1410
 
@@ -8,6 +8,7 @@ module Libis
8
8
  module Tools
9
9
  module Metadata
10
10
 
11
+ # This class implements the missing private method 'get_all_records' to accomodate for the MARC-XML format.
11
12
  class Marc21Record < Libis::Tools::Metadata::MarcRecord
12
13
 
13
14
  private
@@ -85,11 +85,19 @@ module Libis
85
85
 
86
86
  alias_method :each_tag, :all_tags
87
87
 
88
+ # Get all fields matching search criteria.
89
+ # As {#all_tags} but without subfield criteria.
90
+ # @param [String] tag Tag selection string. Tag name with indicators, '#' for wildcard, '_' for blank. If an
91
+ # extra subfield name is added, a result will be created for each instance found of that subfield.
92
+ # @param [Proc] select_block block that will be executed once for each field found. The block takes one argument
93
+ # (the field) and should return true or false. True selects the field, false rejects it.
94
+ # @return [Array] If a block was supplied to the method call, the array will contain the result of the block
95
+ # for each tag found. Otherwise the array will just contain the data for each matching tag.
88
96
  def select_fields(tag, select_block = nil, &block)
89
97
  all_tags(tag, nil, select_block, &block)
90
98
  end
91
99
 
92
- # Find the first field matching the criteria.
100
+ # Find the first tag matching the criteria.
93
101
  #
94
102
  # If a block is supplied, it will be called with the found field data. The return value will be whatever the
95
103
  # block returns. If no block is supplied, the field data will be returned. If nothing was found, the return
@@ -105,34 +113,43 @@ module Libis
105
113
  yield result
106
114
  end
107
115
 
116
+ # Find all fields matching the criteria.
117
+ # (see #first_tag)
118
+ # @param (see #first_tag)
108
119
  def all_fields(tag, subfields)
109
- r = all_tags(tag, subfields).collect { |tag| tag.subfields_array(subfields) }.flatten.compact
120
+ r = all_tags(tag, subfields).collect { |t| t.subfields_array(subfields) }.flatten.compact
110
121
  return r unless block_given?
111
122
  r.map { |field| yield field }
112
123
  r.size > 0
113
124
  end
114
125
 
115
- def first_field(t, s)
116
- result = all_fields(t, s).first
126
+ # Find the first field matching the criteria
127
+ # (see #all_fields)
128
+ # @param (see #all_fields)
129
+ def first_field(tag, subfields)
130
+ result = all_fields(tag, subfields).first
117
131
  return result unless block_given?
118
132
  return false unless result
119
133
  yield result
120
134
  true
121
135
  end
122
136
 
123
-
124
- def each_field(t, s)
125
- all_fields(t, s).each do |field|
137
+ # Perform action on each field found. Code block required.
138
+ # @param (see #all_fields)
139
+ def each_field(tag, subfields)
140
+ all_fields(tag, subfields).each do |field|
126
141
  yield field
127
142
  end
128
143
  end
129
144
 
145
+ # Dump content to string.
130
146
  def marc_dump
131
147
  all.values.flatten.each_with_object([]) { |record, m| m << record.dump }.join
132
148
  end
133
149
 
150
+ # Save the current MARC record to file.
151
+ # @param [String] filename name of the file
134
152
  def save(filename)
135
-
136
153
  doc = ::Libis::Tools::XmlDocument.new
137
154
  doc.root = @node
138
155
 
@@ -142,23 +159,25 @@ module Libis
142
159
  ::Nokogiri::XML::Node::SaveOptions::AS_XML |
143
160
  ::Nokogiri::XML::Node::SaveOptions::FORMAT
144
161
  )
145
-
146
162
  end
147
163
 
164
+ # Load XML document from file and create a new {MarcRecord} for it.
165
+ # @param [String] filename name of XML Marc file
148
166
  def self.load(filename)
149
-
150
167
  doc = ::Libis::Tools::XmlDocument.open(filename)
151
168
  self.new(doc.root)
152
-
153
169
  end
154
170
 
171
+ # Load XML document from stream and create a new {MarcRecord} for it.
172
+ # @param [IO,String] io input stream
155
173
  def self.read(io)
156
174
  io = StringIO.new(io) if io.is_a? String
157
175
  doc = ::Libis::Tools::XmlDocument.parse(io)
158
176
  self.new(doc.root)
159
-
160
177
  end
161
178
 
179
+ # Dump Marc record in Aleph Sequential format
180
+ # @return [String] Aleph sequential output
162
181
  def to_aseq
163
182
  record = ''
164
183
  doc_number = tag('001').datas
@@ -7,6 +7,8 @@ module Libis
7
7
  module Tools
8
8
  module Metadata
9
9
  # noinspection RubyResolve
10
+
11
+ # New style parsers and converters for metadata. New, not finished and untested.
10
12
  class BasicParser < Parslet::Parser
11
13
  # space
12
14
  rule(:space) { match('\s') }
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
7
7
  module Libis
8
8
  module Tools
9
9
  module Metadata
10
-
11
10
  # noinspection RubyResolve
11
+
12
+ # New style parsers and converters for metadata. New, not finished and untested.
12
13
  class DublinCoreParser < Libis::Tools::Metadata::BasicParser
13
14
  rule(:namespace) { match('[^:]').repeat(1).as(:namespace) >> str(':') }
14
15
  rule(:namespace?) { namespace.maybe }
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class Marc21Parser < Libis::Tools::Metadata::BasicParser
14
15
 
15
16
  root(:marc21)
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class MarcFormatParser < Libis::Tools::Metadata::BasicParser
14
15
  include Libis::Tools::Metadata::MarcRules
15
16
 
@@ -5,8 +5,9 @@ require 'parslet'
5
5
  module Libis
6
6
  module Tools
7
7
  module Metadata
8
-
9
8
  # noinspection RubyResolve
9
+
10
+ # New style parsers and converters for metadata. New, not finished and untested.
10
11
  module MarcRules
11
12
  include Parslet
12
13
 
@@ -8,8 +8,9 @@ require_relative 'marc_rules'
8
8
  module Libis
9
9
  module Tools
10
10
  module Metadata
11
-
12
11
  # noinspection RubyResolve
12
+
13
+ # New style parsers and converters for metadata. New, not finished and untested.
13
14
  class MarcSelectParser < Libis::Tools::Metadata::BasicParser
14
15
  include Libis::Tools::Metadata::MarcRules
15
16
  root(:MARC)
@@ -1,3 +1,4 @@
1
+ # New style parsers and converters for metadata. New, not finished and untested.
1
2
  class Parslet::Pattern
2
3
 
3
4
  def element_match_hash(tree, exp, bindings)
@@ -7,8 +7,9 @@ require_relative 'basic_parser'
7
7
  module Libis
8
8
  module Tools
9
9
  module Metadata
10
-
11
10
  # noinspection RubyResolve
11
+
12
+ # New style parsers and converters for metadata. New, not finished and untested.
12
13
  class SubfieldCriteriaParser < Libis::Tools::Metadata::BasicParser
13
14
 
14
15
  root(:criteria)
@@ -8,6 +8,7 @@ require 'libis/tools/extend/hash'
8
8
  module Libis
9
9
  module Tools
10
10
 
11
+ # Copy of old Sharepoint mapping class. Needs inspection and probably a mayor update.
11
12
  class SharepointMapping < Hash
12
13
 
13
14
  def initialize(mapping_file)
@@ -9,6 +9,8 @@ module Libis
9
9
  module Tools
10
10
 
11
11
  # noinspection RubyTooManyMethodsInspection
12
+
13
+ # Copy of the old SharepointRecord class. Needs inspection and probably a mayor update.
12
14
  class SharepointRecord < Hash
13
15
 
14
16
  attr_accessor :node