libis-tools 1.0.5-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +40 -0
- data/Gemfile +7 -0
- data/README.md +202 -0
- data/Rakefile +11 -0
- data/bin/libis_tool +5 -0
- data/lib/libis-tools.rb +1 -0
- data/lib/libis/tools.rb +25 -0
- data/lib/libis/tools/assert.rb +52 -0
- data/lib/libis/tools/checksum.rb +106 -0
- data/lib/libis/tools/cli/cli_helper.rb +189 -0
- data/lib/libis/tools/cli/reorg.rb +416 -0
- data/lib/libis/tools/command.rb +133 -0
- data/lib/libis/tools/command_line.rb +23 -0
- data/lib/libis/tools/config.rb +147 -0
- data/lib/libis/tools/config_file.rb +85 -0
- data/lib/libis/tools/csv.rb +38 -0
- data/lib/libis/tools/deep_struct.rb +71 -0
- data/lib/libis/tools/extend/array.rb +16 -0
- data/lib/libis/tools/extend/empty.rb +7 -0
- data/lib/libis/tools/extend/hash.rb +147 -0
- data/lib/libis/tools/extend/kernel.rb +25 -0
- data/lib/libis/tools/extend/ostruct.rb +3 -0
- data/lib/libis/tools/extend/roo.rb +91 -0
- data/lib/libis/tools/extend/string.rb +94 -0
- data/lib/libis/tools/extend/struct.rb +29 -0
- data/lib/libis/tools/extend/symbol.rb +8 -0
- data/lib/libis/tools/logger.rb +130 -0
- data/lib/libis/tools/mets_dnx.rb +61 -0
- data/lib/libis/tools/mets_file.rb +504 -0
- data/lib/libis/tools/mets_objects.rb +547 -0
- data/lib/libis/tools/parameter.rb +372 -0
- data/lib/libis/tools/spreadsheet.rb +196 -0
- data/lib/libis/tools/temp_file.rb +42 -0
- data/lib/libis/tools/thread_safe.rb +31 -0
- data/lib/libis/tools/version.rb +5 -0
- data/lib/libis/tools/xml_document.rb +583 -0
- data/libis-tools.gemspec +55 -0
- data/spec/assert_spec.rb +65 -0
- data/spec/checksum_spec.rb +68 -0
- data/spec/command_spec.rb +90 -0
- data/spec/config_file_spec.rb +83 -0
- data/spec/config_spec.rb +113 -0
- data/spec/csv_spec.rb +159 -0
- data/spec/data/test-headers.csv +2 -0
- data/spec/data/test-headers.tsv +2 -0
- data/spec/data/test-noheaders.csv +1 -0
- data/spec/data/test-noheaders.tsv +1 -0
- data/spec/data/test.data +9 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +8 -0
- data/spec/data/test.yml +2 -0
- data/spec/data/test_config.yml +15 -0
- data/spec/deep_struct_spec.rb +138 -0
- data/spec/logger_spec.rb +165 -0
- data/spec/mets_file_spec.rb +223 -0
- data/spec/parameter_container_spec.rb +152 -0
- data/spec/parameter_spec.rb +148 -0
- data/spec/spec_helper.rb +29 -0
- data/spec/spreadsheet_spec.rb +1820 -0
- data/spec/temp_file_spec.rb +76 -0
- data/spec/test.xsd +20 -0
- data/spec/thread_safe_spec.rb +64 -0
- data/spec/xmldocument_spec.rb +421 -0
- data/test/test_helper.rb +7 -0
- data/test/webservices/test_ca_item_info.rb +59 -0
- data/test/webservices/test_ca_search.rb +35 -0
- metadata +437 -0
@@ -0,0 +1,372 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'date'
|
3
|
+
require 'libis/tools/extend/struct'
|
4
|
+
require 'concurrent/hash'
|
5
|
+
|
6
|
+
module Libis
|
7
|
+
module Tools
|
8
|
+
|
9
|
+
# Exception that will be raised when a parameter value does not pass the validation checks.
|
10
|
+
class ParameterValidationError < RuntimeError;
|
11
|
+
end
|
12
|
+
|
13
|
+
# Exception that will be raised when an attempt is made to change the value of a frozen parameter.
|
14
|
+
class ParameterFrozenError < RuntimeError;
|
15
|
+
end
|
16
|
+
|
17
|
+
# noinspection RubyConstantNamingConvention
|
18
|
+
|
19
|
+
# A {Parameter} is like a class instance attribute on steroids. Contrary to regular attributes, {Parameter}s are
|
20
|
+
# type-safe, can have a descriptive text explaining their use, a constraint that limits the values and any other
|
21
|
+
# properties for an application to use for their needs.
|
22
|
+
#
|
23
|
+
# Parameters are inherited from base classes and can be overwritten without affecting the parameters in the parent
|
24
|
+
# class. For instance, a regular parameter in the parent class can be given a fixed value in the child class by
|
25
|
+
# giving it a default value and setting it's frozen property to true. The same paremter in the parent class
|
26
|
+
# instances will still be modifieable. But the parameter in the child class instances will be frozen, even if
|
27
|
+
# accessed via the methods on parent class.
|
28
|
+
#
|
29
|
+
# Important: the parameter will exist both on the class level as on the instance level, but the parameter on the
|
30
|
+
# class level is the parameter definition as described in the {Parameter} class. On the instance level, there are
|
31
|
+
# merely some parameter methods that access the parameter instance values with the help of the parameter definitions
|
32
|
+
# on the class. The implementation of the parameter instances is dealt with by the {ParameterContainer} module.
|
33
|
+
class Parameter < Struct.new(:name, :default, :datatype, :description, :constraint, :frozen, :options)
|
34
|
+
|
35
|
+
# Create a Parameter instance.
|
36
|
+
# @param [Array] args The values for:
|
37
|
+
# * name - Required. String for the name of the parameter. Any valid attribute name is acceptable.
|
38
|
+
# * default value - Any value. Will be coverted to the given datatype if present. Default is nil.
|
39
|
+
# * datatype - String. One of: bool, string, int, float, datetime, array, hash. If omitted it will be derived
|
40
|
+
# from the default value or set to the default 'string'.
|
41
|
+
# * description - String describing the parameter's use.
|
42
|
+
# * constraint - Array, Range, RegEx or single value. Default is nil meaning no constraint.
|
43
|
+
# * frozen - Boolean. Default is false; if true the parameter value cannot be changed from the default value.
|
44
|
+
# * options - Any Hash. It's up to the applcation to interprete and use this info.
|
45
|
+
# datatype can be omitted if the type can be derived from the
|
46
|
+
def initialize(*args)
|
47
|
+
super(*args)
|
48
|
+
self[:options] ||= {}
|
49
|
+
self[:datatype] ||= guess_datatype
|
50
|
+
end
|
51
|
+
|
52
|
+
# Duplicates the parameter
|
53
|
+
def dup
|
54
|
+
new_obj = super
|
55
|
+
# noinspection RubyResolve
|
56
|
+
new_obj[:options] = Marshal.load(Marshal.dump(self[:options]))
|
57
|
+
new_obj
|
58
|
+
end
|
59
|
+
|
60
|
+
# Merges other parameter data into the current parameter
|
61
|
+
# @param [::Libis::Tools::Parameter] other parameter definition to copy properties from
|
62
|
+
def merge!(other)
|
63
|
+
other.each do |k, v|
|
64
|
+
if k == :options
|
65
|
+
self[:options].merge!(v)
|
66
|
+
else
|
67
|
+
self[k] = v
|
68
|
+
end
|
69
|
+
end
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
# Retrieve a specific property of the parameter.
|
74
|
+
# If not found in the regular properties, the options Hash is scanned for the property.
|
75
|
+
# @param [Symbol] key name of the property
|
76
|
+
def [](key)
|
77
|
+
return super(key) if members.include?(key)
|
78
|
+
self[:options][key]
|
79
|
+
end
|
80
|
+
|
81
|
+
# Set a property of the parameter.
|
82
|
+
# If the property is not one of the regular properties, the property will be set in the options Hash.
|
83
|
+
# @param (see #[])
|
84
|
+
# @param [Object] value value for the property. No type checking happens on this value
|
85
|
+
def []=(key, value)
|
86
|
+
return super(key, value) if members.include?(key)
|
87
|
+
self[:options][key] = value
|
88
|
+
end
|
89
|
+
|
90
|
+
# Convience method to create a new {Parameter} from a Hash.
|
91
|
+
# @param [Hash] h Hash with parameter definition properties
|
92
|
+
def self.from_hash(h)
|
93
|
+
h.each { |k, v| self[k.to_s.to_sym] = v }
|
94
|
+
end
|
95
|
+
|
96
|
+
# Dumps the parameter properties into a Hash.
|
97
|
+
# The options properties are merged into the hash. If you do not want that, use Struct#to_h instead.
|
98
|
+
#
|
99
|
+
# @return [Hash] parameter definition properties
|
100
|
+
def to_h
|
101
|
+
super.inject({}) do |hash, key, value|
|
102
|
+
key == :options ? value.each { |k, v| hash[k] = v } : hash[key] = value
|
103
|
+
hash
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Valid input strings for boolean parameter value, all converted to 'true'
|
108
|
+
TRUE_BOOL = %w'true yes t y 1'
|
109
|
+
# Valid input strings for boolean parameter value, all converted to 'false'
|
110
|
+
FALSE_BOOL = %w'false no f n 0'
|
111
|
+
|
112
|
+
# Parse any value and try to convert to the correct datatype and check the constraints.
|
113
|
+
# Will throw an exception if not valid.
|
114
|
+
# @param [Object] value Any value to parse, strings are best supported.
|
115
|
+
# @return [Object] checked and converted value
|
116
|
+
def parse(value = nil)
|
117
|
+
result = value.nil? ? self[:default] : convert(value)
|
118
|
+
check_constraint(result)
|
119
|
+
result
|
120
|
+
end
|
121
|
+
|
122
|
+
# Parse any value and try to convert to the correct datatype and check the constraints.
|
123
|
+
# Will return false if not valid, true otherwise.
|
124
|
+
# @param [Object] value Any value to check
|
125
|
+
def valid_value?(value)
|
126
|
+
begin
|
127
|
+
parse(value)
|
128
|
+
rescue
|
129
|
+
return false
|
130
|
+
end
|
131
|
+
true
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def guess_datatype
|
137
|
+
self[:datatype] || case self[:default]
|
138
|
+
when TrueClass, FalseClass
|
139
|
+
'bool'
|
140
|
+
when NilClass
|
141
|
+
'string'
|
142
|
+
when Integer
|
143
|
+
'int'
|
144
|
+
when Float
|
145
|
+
'float'
|
146
|
+
when DateTime, Date, Time
|
147
|
+
'datetime'
|
148
|
+
when Array
|
149
|
+
'array'
|
150
|
+
when Hash
|
151
|
+
'hash'
|
152
|
+
else
|
153
|
+
self[:default].class.name.downcase
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def convert(v)
|
158
|
+
case self[:datatype].to_s.downcase
|
159
|
+
when 'boolean', 'bool'
|
160
|
+
return true if TRUE_BOOL.include?(v.to_s.downcase)
|
161
|
+
return false if FALSE_BOOL.include?(v.to_s.downcase)
|
162
|
+
raise ParameterValidationError, "No boolean information in '#{v.to_s}'. " +
|
163
|
+
"Valid values are: '#{TRUE_BOOL.join('\', \'')}" +
|
164
|
+
"' and '#{FALSE_BOOL.join('\', \'')}'."
|
165
|
+
when 'string', 'nil'
|
166
|
+
return v.to_s
|
167
|
+
when 'int'
|
168
|
+
return Integer(v)
|
169
|
+
when 'float'
|
170
|
+
return Float(v)
|
171
|
+
when 'datetime'
|
172
|
+
return v.to_datetime if v.respond_to? :to_datetime
|
173
|
+
return DateTime.parse(v)
|
174
|
+
when 'array'
|
175
|
+
return v if v.is_a?(Array)
|
176
|
+
return v.split(/[,;|\s]+/) if v.is_a?(String)
|
177
|
+
# Alternatavely:
|
178
|
+
# return JSON.parse(v) if v.is_a?(String)
|
179
|
+
return v.to_a if v.respond_to?(:to_a)
|
180
|
+
when 'hash'
|
181
|
+
return v if v.is_a?(Hash)
|
182
|
+
return Hash[(0...v.size).zip(v)] if v.is_a?(Array)
|
183
|
+
return JSON.parse(v) if v.is_a?(String)
|
184
|
+
else
|
185
|
+
raise ParameterValidationError, "Datatype not supported: '#{self[:datatype]}'"
|
186
|
+
end
|
187
|
+
nil
|
188
|
+
end
|
189
|
+
|
190
|
+
def check_constraint(v, constraint = nil)
|
191
|
+
constraint ||= self[:constraint]
|
192
|
+
return if constraint.nil?
|
193
|
+
unless constraint_checker(v, constraint)
|
194
|
+
raise ParameterValidationError, "Value '#{v}' is not allowed (constraint: #{constraint})."
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def constraint_checker(v, constraint)
|
199
|
+
|
200
|
+
case constraint
|
201
|
+
when Array
|
202
|
+
constraint.each do |c|
|
203
|
+
return true if (constraint_checker(v, c) rescue false)
|
204
|
+
end
|
205
|
+
return true if constraint.include? v
|
206
|
+
when Range
|
207
|
+
return true if constraint.cover? v
|
208
|
+
when Regexp
|
209
|
+
return true if v =~ constraint
|
210
|
+
else
|
211
|
+
return true if v == constraint
|
212
|
+
end
|
213
|
+
false
|
214
|
+
end
|
215
|
+
|
216
|
+
end # Parameter
|
217
|
+
|
218
|
+
# To use the parameters a class should include the ParameterContainer module and add parameter
|
219
|
+
# statements to the body of the class definition.
|
220
|
+
#
|
221
|
+
# Besides enabling the {::Libis::Tools::ParameterContainer::ClassMethods#parameter parameter} class method to
|
222
|
+
# define parameters, the module adds the class method
|
223
|
+
# {::Libis::Tools::ParameterContainer::ClassMethods#parameter_defs parameter_defs} that will return
|
224
|
+
# a Hash with parameter names as keys and their respective parameter definitions as values.
|
225
|
+
#
|
226
|
+
# On each class instance the {::Libis::Tools::ParameterContainer#parameter parameter} method is added and serves
|
227
|
+
# as both getter and setter for parameter values.
|
228
|
+
# The methods {::Libis::Tools::ParameterContainer#[] []} and {::Libis::Tools::ParameterContainer#[]= []=} serve as
|
229
|
+
# aliases for the getter and setter calls.
|
230
|
+
#
|
231
|
+
# Additionally two protected methods are available on the instance:
|
232
|
+
# * {::Libis::Tools::ParameterContainer#parameters parameters}: returns the Hash that keeps track of the current
|
233
|
+
# parameter values for the instance.
|
234
|
+
# * {::Libis::Tools::ParameterContainer#get_parameter_definition get_parameter_defintion}: retrieves the parameter
|
235
|
+
# definition from the instance's class for the given parameter name.
|
236
|
+
#
|
237
|
+
# Any class that derives from a class that included the ParameterContainer module will automatically inherit all
|
238
|
+
# parameter definitions from all of it's base classes and can override any of these parameter definitions e.g. to
|
239
|
+
# change the default values for the parameter.
|
240
|
+
#
|
241
|
+
module ParameterContainer
|
242
|
+
|
243
|
+
# Methods created on class level.
|
244
|
+
module ClassMethods
|
245
|
+
|
246
|
+
# Get a list of all parameter definitions.
|
247
|
+
# The list is initialized with duplicates of the parameter definitions of the parent class and
|
248
|
+
# each new parameter definition updates or appends the list.
|
249
|
+
# @return [Hash] with parameter names as keys and {Parameter} instance as value.
|
250
|
+
def parameter_defs
|
251
|
+
return @parameters if @parameters
|
252
|
+
@parameters = ::Concurrent::Hash.new
|
253
|
+
begin
|
254
|
+
self.superclass.parameter_defs.
|
255
|
+
each_with_object(@parameters) do |(name, param), hash|
|
256
|
+
hash[name] = param.dup
|
257
|
+
end
|
258
|
+
rescue NoMethodError
|
259
|
+
# ignored
|
260
|
+
end
|
261
|
+
@parameters
|
262
|
+
end
|
263
|
+
|
264
|
+
# DSL method that allows creating parameter definitions on the class level.
|
265
|
+
#
|
266
|
+
# It takes only one mandatory argument which is a Hash. The first entry is interpreted as '<name>: <default>'.
|
267
|
+
# The name for the parameter should be unique and the default value can be any value
|
268
|
+
# of type TrueClass, FalseClass, String, Integer, Float, Date, Time, DateTime, Array, Hash or nil.
|
269
|
+
#
|
270
|
+
# The second up to last Hash entries are optional properties for the parameter. These are:
|
271
|
+
# * datatype: the type of values the parameter will accept. Valid values are:
|
272
|
+
# * 'bool' or 'boolean'
|
273
|
+
# * 'string'
|
274
|
+
# * 'int'
|
275
|
+
# * 'float'
|
276
|
+
# * 'datetime'
|
277
|
+
# * 'array'
|
278
|
+
# * 'hash'
|
279
|
+
# Any other value will raise an Exception when the parameter is used. The value is case-insensitive and
|
280
|
+
# if not present, the datatype will be derived from the default value with 'string' being the default for
|
281
|
+
# NilClass. In any case the parameter will try its best to convert supplied values to the proper data type.
|
282
|
+
# For instance, an Integer parameter will accept 3, 3.1415, '3' and Rational(10/3) as valid values and
|
283
|
+
# store them as the integer value 3. Likewise DateTime parameters will try to interprete date and time strings.
|
284
|
+
# * description: any descriptive text you want to add to clarify what this parameter is used for.
|
285
|
+
# Any tool can ask the class for its parameters and - for instance - can use this property to provide help
|
286
|
+
# in a GUI when asking the user for input.
|
287
|
+
# * constraint: adds a validation condition to the parameter. The condition value can be:
|
288
|
+
# * an array: only values that convert to a value in the list are considered valid.
|
289
|
+
# * a range: only values that convert to a value in the given range are considered valid.
|
290
|
+
# * a regular expression: only values that match the regular expression are considered valid.
|
291
|
+
# * a string: only values that are '==' to the constraint are considered valid.
|
292
|
+
# * frozen: if set to true, prevents the class instance to set the parameter to any value other than
|
293
|
+
# the default. Mostly useful when a derived class needs a parameter in the parent class to be set to a
|
294
|
+
# specific value. Setting a value on a frozen parameter with the 'parameter(name,value)' method throws a
|
295
|
+
# {::Libis::Tools::ParameterFrozenError}.
|
296
|
+
# * options: a hash with any additional properties that you want to associate to the parameter. Any key-value pair in this
|
297
|
+
# hash is added to the retrievable properties of the parameter. Likewise any property defined, that is not in the list of
|
298
|
+
# known properties is added to the options hash. In this aspect the ::Libis::Tools::Parameter class behaves much like an
|
299
|
+
# OpenStruct even though it is implemented as a Struct.
|
300
|
+
def parameter(options = {})
|
301
|
+
return self.parameter_defs[options] unless options.is_a? Hash
|
302
|
+
return nil if options.keys.empty?
|
303
|
+
param_def = options.shift
|
304
|
+
name = param_def.first.to_s.to_sym
|
305
|
+
default = param_def.last
|
306
|
+
param = (self.parameter_defs[name] ||= Parameter.new(name, default))
|
307
|
+
options[:default] = default
|
308
|
+
options.each { |key, value| param[key] = value if value }
|
309
|
+
param
|
310
|
+
end
|
311
|
+
|
312
|
+
end
|
313
|
+
|
314
|
+
# @!visibility private
|
315
|
+
def self.included(base)
|
316
|
+
base.extend(ClassMethods)
|
317
|
+
end
|
318
|
+
|
319
|
+
# Special constant to indicate a parameter has no value set. Nil cannot be used as it is a valid value.
|
320
|
+
NO_VALUE = '##NAV##'
|
321
|
+
|
322
|
+
# Getter/setter for parameter instances
|
323
|
+
# With only one argument (the parameter name) it returns the current value for the parameter, but the optional
|
324
|
+
# second argument will cause the method to set the parameter value. If the parameter is not available or
|
325
|
+
# the given value is not a valid value for the parameter, the method will return the special constant
|
326
|
+
# {::Libis::Tools::ParameterContainer::NO_VALUE NO_VALUE}.
|
327
|
+
#
|
328
|
+
# Setting a value on a frozen parameter with the 'parameter(name,value)' method throws a
|
329
|
+
# {::Libis::Tools::ParameterFrozenError} exception.
|
330
|
+
def parameter(name, value = NO_VALUE)
|
331
|
+
param_def = get_parameter_definition(name)
|
332
|
+
return NO_VALUE unless param_def
|
333
|
+
if value.equal? NO_VALUE
|
334
|
+
param_value = parameters[name]
|
335
|
+
param_def.parse(param_value)
|
336
|
+
else
|
337
|
+
return NO_VALUE unless param_def.valid_value?(value)
|
338
|
+
if param_def[:frozen]
|
339
|
+
raise ParameterFrozenError, "Parameter '#{param_def[:name]}' is frozen in '#{self.class.name}'"
|
340
|
+
end
|
341
|
+
parameters[name] = value
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
# Alias for the {#parameter} getter.
|
346
|
+
def [](name)
|
347
|
+
parameter(name)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Alias for the {#parameter} setter.
|
351
|
+
# The only difference is that in case of a frozen parameter, this method silently ignores the exception,
|
352
|
+
# but the default value still will not be changed.
|
353
|
+
def []=(name, value)
|
354
|
+
parameter name, value
|
355
|
+
rescue ParameterFrozenError
|
356
|
+
# ignored
|
357
|
+
end
|
358
|
+
|
359
|
+
protected
|
360
|
+
|
361
|
+
def parameters
|
362
|
+
@parameter_values ||= Hash.new
|
363
|
+
end
|
364
|
+
|
365
|
+
def get_parameter_definition(name)
|
366
|
+
self.class.parameter_defs[name]
|
367
|
+
end
|
368
|
+
|
369
|
+
end # ParameterContainer
|
370
|
+
|
371
|
+
end # Tools
|
372
|
+
end # Libis
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'libis/tools/extend/roo'
|
2
|
+
require 'libis/tools/extend/hash'
|
3
|
+
require 'awesome_print'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Tools
|
7
|
+
|
8
|
+
class Spreadsheet
|
9
|
+
|
10
|
+
# Spreadsheet reader.
|
11
|
+
#
|
12
|
+
# This class supports CSV, Excel 2007-2016, Excel (pre-2007) and LibreOffice/OpenOffice Calc
|
13
|
+
# thanks to the Roo (http://github.com/roo-rb/roo) project.
|
14
|
+
#
|
15
|
+
# The first argument is the file name to read. For spreadsheets, append '|' and the sheet name to specify the
|
16
|
+
# sheet to read.
|
17
|
+
#
|
18
|
+
# The second argument is a Hash with options. The options can be:
|
19
|
+
# - required: a list of headers that need to be present. The list can be an Array containing the litteral header
|
20
|
+
# values expected. Alternatively, a Hash is also allowed with alternative header names as keys and litteral
|
21
|
+
# names as values. If a :headers keys is present in the Hash with a value of true or :first, whatever is on the
|
22
|
+
# first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search with an array
|
23
|
+
# of strings as value will search for a row that contains each of the strings in the given array. Each string is
|
24
|
+
# searched by regular expression, so strings may contain wildcards.
|
25
|
+
# Default is empty array, meaning to use whatever is on the first row as header.
|
26
|
+
# - optional: a list of headers that may be present, but are not required. Similar format as above. Default is
|
27
|
+
# empty array.
|
28
|
+
# - noheader: a list of headers to force upon the sheet if no headers are present.
|
29
|
+
# - extension: :csv, :xlsx, :xlsm, :ods, :xls, :google to help the library in deciding what format the file is in.
|
30
|
+
#
|
31
|
+
# The following options are only applicable to CSV input files and are ignored otherwise.
|
32
|
+
# - encoding: the encoding of the CSV file. e.g. 'windows-1252:UTF-8' to convert the input from windows code page
|
33
|
+
# 1252 to UTF-8 during file reading
|
34
|
+
# - col_sep: column separator. Default is ',', but can be set to "\t" for TSV files.
|
35
|
+
# - quote_char: character for quoting.
|
36
|
+
#
|
37
|
+
# @param [String] file_name
|
38
|
+
# @param [Hash] opts
|
39
|
+
def initialize(file_name, opts = {})
|
40
|
+
options = {
|
41
|
+
csv_options: {
|
42
|
+
encoding: 'UTF-8',
|
43
|
+
col_sep: ',',
|
44
|
+
quote_char: '"',
|
45
|
+
}.merge([:encoding, :col_sep, :quote_char].inject({}) do |h, k|
|
46
|
+
h[k] = opts.delete(k) if opts[k]
|
47
|
+
h
|
48
|
+
end)
|
49
|
+
}.merge(opts)
|
50
|
+
|
51
|
+
required_headers = options.delete(:required) || []
|
52
|
+
optional_headers = options.delete(:optional) || []
|
53
|
+
noheader_headers = options.delete(:noheader) || []
|
54
|
+
|
55
|
+
file, sheet = file_name.split('|')
|
56
|
+
@ss = ::Roo::Spreadsheet.open(file, options)
|
57
|
+
@ss.default_sheet = sheet if sheet
|
58
|
+
|
59
|
+
@header_options = {}
|
60
|
+
|
61
|
+
check_headers(required: required_headers, optional: optional_headers, noheader: noheader_headers)
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
# Iterate over sheet content.
|
66
|
+
#
|
67
|
+
# The options Hash can contain the following keys:
|
68
|
+
# - :sheet - overwrites default sheet name
|
69
|
+
# - :required - Array or Hash of required headers
|
70
|
+
# - :optional - Array or Hash of optional headers
|
71
|
+
# - :noheader - Array of noheader headers
|
72
|
+
#
|
73
|
+
# Each iteration, a Hash will be passed with the key names as specified in the header options and the
|
74
|
+
# corresponding cell values.
|
75
|
+
#
|
76
|
+
# @param [Hash] options
|
77
|
+
def each(options = {}, &block)
|
78
|
+
@ss.default_sheet = options[:sheet] if options[:sheet]
|
79
|
+
@ss.each(check_headers(options), &block)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Parse sheet content.
|
83
|
+
#
|
84
|
+
# The options Hash can contain the following keys:
|
85
|
+
# - :sheet - overwrites default sheet name
|
86
|
+
# - :required - Array or Hash of required headers
|
87
|
+
# - :optional - Array or Hash of optional headers
|
88
|
+
# - :noheader - Array of noheader headers
|
89
|
+
#
|
90
|
+
# An Array will be returned with for each row a Hash with the key names as specified in the header options and the
|
91
|
+
# corresponding cell values.
|
92
|
+
#
|
93
|
+
# @param [Hash] options
|
94
|
+
# @return [Array<Hash>]
|
95
|
+
def parse(options = {})
|
96
|
+
@ss.default_sheet = options.delete(:sheet) if options.has_key?(:sheet)
|
97
|
+
@ss.parse(check_headers(options))
|
98
|
+
end
|
99
|
+
|
100
|
+
# Return the current row and increment the current_row pointer.
|
101
|
+
def shift
|
102
|
+
return nil unless @current_row < @ss.last_row
|
103
|
+
@current_row += 1
|
104
|
+
Hash[@ss.row(@current_row).map.with_index { |v, i| [headers[i], v] }]
|
105
|
+
end
|
106
|
+
|
107
|
+
# Set the current_row pointer back to the start
|
108
|
+
def restart
|
109
|
+
@current_row = @ss.header_line
|
110
|
+
end
|
111
|
+
|
112
|
+
# Open and iterate over sheet content.
|
113
|
+
#
|
114
|
+
# @param @see #initialize
|
115
|
+
def self.foreach(file_name, opts = {}, &block)
|
116
|
+
Libis::Tools::Spreadsheet.new(file_name, opts).each(&block)
|
117
|
+
end
|
118
|
+
|
119
|
+
def headers
|
120
|
+
(@ss.headers || {}).keys
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def check_headers(options = {})
|
126
|
+
if options[:required] || options[:optional] || options[:noheader]
|
127
|
+
|
128
|
+
# defaults
|
129
|
+
ss_options = {}
|
130
|
+
required_headers = options[:required] || []
|
131
|
+
optional_headers = options[:optional] || []
|
132
|
+
|
133
|
+
# make sure required_headers is a Hash
|
134
|
+
case required_headers
|
135
|
+
when Hash
|
136
|
+
# OK
|
137
|
+
when Array
|
138
|
+
required_headers = Hash[required_headers.zip(required_headers)]
|
139
|
+
else
|
140
|
+
raise RuntimeError, 'Required headers should be either a Hash or an Array.'
|
141
|
+
end
|
142
|
+
|
143
|
+
# make sure optional_headers is a Hash
|
144
|
+
case optional_headers
|
145
|
+
when Hash
|
146
|
+
# OK
|
147
|
+
when Array
|
148
|
+
optional_headers = Hash[optional_headers.zip(optional_headers)]
|
149
|
+
else
|
150
|
+
raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
|
151
|
+
end
|
152
|
+
|
153
|
+
# make sure noheader_headers is properly intialized
|
154
|
+
noheader_headers = options[:noheader]
|
155
|
+
raise RuntimeError, 'Noheader headers should be an Array.' unless noheader_headers.is_a?(Array)
|
156
|
+
|
157
|
+
# if not set, default to both required and optional headers
|
158
|
+
noheader_headers = (required_headers.keys + optional_headers.keys) if noheader_headers.empty?
|
159
|
+
|
160
|
+
# force noheader_headers or just use first row
|
161
|
+
ss_options[:headers] = noheader_headers.empty? ? :first_row : noheader_headers
|
162
|
+
|
163
|
+
# search for whatever whas supplied
|
164
|
+
ss_options.merge!(required_headers).merge!(optional_headers)
|
165
|
+
|
166
|
+
# allow partial match for only required headers
|
167
|
+
ss_options[:partial_match] = true
|
168
|
+
ss_options[:required_headers] = required_headers.keys
|
169
|
+
|
170
|
+
# force a header check (may throw exceptions)
|
171
|
+
begin
|
172
|
+
@ss.each(ss_options.dup) { break }
|
173
|
+
rescue Roo::HeaderRowNotFoundError
|
174
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
175
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
176
|
+
rescue Roo::HeaderRowIncompleteError
|
177
|
+
if @ss.row(@ss.header_line).compact.empty?
|
178
|
+
raise RuntimeError, 'Sheet does not contain enough columns.'
|
179
|
+
else
|
180
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
181
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
@current_row = @ss.header_line
|
187
|
+
@header_options = ss_options
|
188
|
+
end
|
189
|
+
|
190
|
+
@header_options.dup
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
end
|