libis-tools 1.0.5-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +16 -0
- data/.rspec +2 -0
- data/.travis.yml +40 -0
- data/Gemfile +7 -0
- data/README.md +202 -0
- data/Rakefile +11 -0
- data/bin/libis_tool +5 -0
- data/lib/libis-tools.rb +1 -0
- data/lib/libis/tools.rb +25 -0
- data/lib/libis/tools/assert.rb +52 -0
- data/lib/libis/tools/checksum.rb +106 -0
- data/lib/libis/tools/cli/cli_helper.rb +189 -0
- data/lib/libis/tools/cli/reorg.rb +416 -0
- data/lib/libis/tools/command.rb +133 -0
- data/lib/libis/tools/command_line.rb +23 -0
- data/lib/libis/tools/config.rb +147 -0
- data/lib/libis/tools/config_file.rb +85 -0
- data/lib/libis/tools/csv.rb +38 -0
- data/lib/libis/tools/deep_struct.rb +71 -0
- data/lib/libis/tools/extend/array.rb +16 -0
- data/lib/libis/tools/extend/empty.rb +7 -0
- data/lib/libis/tools/extend/hash.rb +147 -0
- data/lib/libis/tools/extend/kernel.rb +25 -0
- data/lib/libis/tools/extend/ostruct.rb +3 -0
- data/lib/libis/tools/extend/roo.rb +91 -0
- data/lib/libis/tools/extend/string.rb +94 -0
- data/lib/libis/tools/extend/struct.rb +29 -0
- data/lib/libis/tools/extend/symbol.rb +8 -0
- data/lib/libis/tools/logger.rb +130 -0
- data/lib/libis/tools/mets_dnx.rb +61 -0
- data/lib/libis/tools/mets_file.rb +504 -0
- data/lib/libis/tools/mets_objects.rb +547 -0
- data/lib/libis/tools/parameter.rb +372 -0
- data/lib/libis/tools/spreadsheet.rb +196 -0
- data/lib/libis/tools/temp_file.rb +42 -0
- data/lib/libis/tools/thread_safe.rb +31 -0
- data/lib/libis/tools/version.rb +5 -0
- data/lib/libis/tools/xml_document.rb +583 -0
- data/libis-tools.gemspec +55 -0
- data/spec/assert_spec.rb +65 -0
- data/spec/checksum_spec.rb +68 -0
- data/spec/command_spec.rb +90 -0
- data/spec/config_file_spec.rb +83 -0
- data/spec/config_spec.rb +113 -0
- data/spec/csv_spec.rb +159 -0
- data/spec/data/test-headers.csv +2 -0
- data/spec/data/test-headers.tsv +2 -0
- data/spec/data/test-noheaders.csv +1 -0
- data/spec/data/test-noheaders.tsv +1 -0
- data/spec/data/test.data +9 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +8 -0
- data/spec/data/test.yml +2 -0
- data/spec/data/test_config.yml +15 -0
- data/spec/deep_struct_spec.rb +138 -0
- data/spec/logger_spec.rb +165 -0
- data/spec/mets_file_spec.rb +223 -0
- data/spec/parameter_container_spec.rb +152 -0
- data/spec/parameter_spec.rb +148 -0
- data/spec/spec_helper.rb +29 -0
- data/spec/spreadsheet_spec.rb +1820 -0
- data/spec/temp_file_spec.rb +76 -0
- data/spec/test.xsd +20 -0
- data/spec/thread_safe_spec.rb +64 -0
- data/spec/xmldocument_spec.rb +421 -0
- data/test/test_helper.rb +7 -0
- data/test/webservices/test_ca_item_info.rb +59 -0
- data/test/webservices/test_ca_search.rb +35 -0
- metadata +437 -0
@@ -0,0 +1,372 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'date'
|
3
|
+
require 'libis/tools/extend/struct'
|
4
|
+
require 'concurrent/hash'
|
5
|
+
|
6
|
+
module Libis
|
7
|
+
module Tools
|
8
|
+
|
9
|
+
# Exception that will be raised when a parameter value does not pass the validation checks.
|
10
|
+
class ParameterValidationError < RuntimeError;
|
11
|
+
end
|
12
|
+
|
13
|
+
# Exception that will be raised when an attempt is made to change the value of a frozen parameter.
|
14
|
+
class ParameterFrozenError < RuntimeError;
|
15
|
+
end
|
16
|
+
|
17
|
+
# noinspection RubyConstantNamingConvention
|
18
|
+
|
19
|
+
# A {Parameter} is like a class instance attribute on steroids. Contrary to regular attributes, {Parameter}s are
|
20
|
+
# type-safe, can have a descriptive text explaining their use, a constraint that limits the values and any other
|
21
|
+
# properties for an application to use for their needs.
|
22
|
+
#
|
23
|
+
# Parameters are inherited from base classes and can be overwritten without affecting the parameters in the parent
|
24
|
+
# class. For instance, a regular parameter in the parent class can be given a fixed value in the child class by
|
25
|
+
# giving it a default value and setting it's frozen property to true. The same paremter in the parent class
|
26
|
+
# instances will still be modifieable. But the parameter in the child class instances will be frozen, even if
|
27
|
+
# accessed via the methods on parent class.
|
28
|
+
#
|
29
|
+
# Important: the parameter will exist both on the class level as on the instance level, but the parameter on the
|
30
|
+
# class level is the parameter definition as described in the {Parameter} class. On the instance level, there are
|
31
|
+
# merely some parameter methods that access the parameter instance values with the help of the parameter definitions
|
32
|
+
# on the class. The implementation of the parameter instances is dealt with by the {ParameterContainer} module.
|
33
|
+
class Parameter < Struct.new(:name, :default, :datatype, :description, :constraint, :frozen, :options)
|
34
|
+
|
35
|
+
# Create a Parameter instance.
|
36
|
+
# @param [Array] args The values for:
|
37
|
+
# * name - Required. String for the name of the parameter. Any valid attribute name is acceptable.
|
38
|
+
# * default value - Any value. Will be coverted to the given datatype if present. Default is nil.
|
39
|
+
# * datatype - String. One of: bool, string, int, float, datetime, array, hash. If omitted it will be derived
|
40
|
+
# from the default value or set to the default 'string'.
|
41
|
+
# * description - String describing the parameter's use.
|
42
|
+
# * constraint - Array, Range, RegEx or single value. Default is nil meaning no constraint.
|
43
|
+
# * frozen - Boolean. Default is false; if true the parameter value cannot be changed from the default value.
|
44
|
+
# * options - Any Hash. It's up to the applcation to interprete and use this info.
|
45
|
+
# datatype can be omitted if the type can be derived from the
|
46
|
+
def initialize(*args)
|
47
|
+
super(*args)
|
48
|
+
self[:options] ||= {}
|
49
|
+
self[:datatype] ||= guess_datatype
|
50
|
+
end
|
51
|
+
|
52
|
+
# Duplicates the parameter
|
53
|
+
def dup
|
54
|
+
new_obj = super
|
55
|
+
# noinspection RubyResolve
|
56
|
+
new_obj[:options] = Marshal.load(Marshal.dump(self[:options]))
|
57
|
+
new_obj
|
58
|
+
end
|
59
|
+
|
60
|
+
# Merges other parameter data into the current parameter
|
61
|
+
# @param [::Libis::Tools::Parameter] other parameter definition to copy properties from
|
62
|
+
def merge!(other)
|
63
|
+
other.each do |k, v|
|
64
|
+
if k == :options
|
65
|
+
self[:options].merge!(v)
|
66
|
+
else
|
67
|
+
self[k] = v
|
68
|
+
end
|
69
|
+
end
|
70
|
+
self
|
71
|
+
end
|
72
|
+
|
73
|
+
# Retrieve a specific property of the parameter.
|
74
|
+
# If not found in the regular properties, the options Hash is scanned for the property.
|
75
|
+
# @param [Symbol] key name of the property
|
76
|
+
def [](key)
|
77
|
+
return super(key) if members.include?(key)
|
78
|
+
self[:options][key]
|
79
|
+
end
|
80
|
+
|
81
|
+
# Set a property of the parameter.
|
82
|
+
# If the property is not one of the regular properties, the property will be set in the options Hash.
|
83
|
+
# @param (see #[])
|
84
|
+
# @param [Object] value value for the property. No type checking happens on this value
|
85
|
+
def []=(key, value)
|
86
|
+
return super(key, value) if members.include?(key)
|
87
|
+
self[:options][key] = value
|
88
|
+
end
|
89
|
+
|
90
|
+
# Convience method to create a new {Parameter} from a Hash.
|
91
|
+
# @param [Hash] h Hash with parameter definition properties
|
92
|
+
def self.from_hash(h)
|
93
|
+
h.each { |k, v| self[k.to_s.to_sym] = v }
|
94
|
+
end
|
95
|
+
|
96
|
+
# Dumps the parameter properties into a Hash.
|
97
|
+
# The options properties are merged into the hash. If you do not want that, use Struct#to_h instead.
|
98
|
+
#
|
99
|
+
# @return [Hash] parameter definition properties
|
100
|
+
def to_h
|
101
|
+
super.inject({}) do |hash, key, value|
|
102
|
+
key == :options ? value.each { |k, v| hash[k] = v } : hash[key] = value
|
103
|
+
hash
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Valid input strings for boolean parameter value, all converted to 'true'
|
108
|
+
TRUE_BOOL = %w'true yes t y 1'
|
109
|
+
# Valid input strings for boolean parameter value, all converted to 'false'
|
110
|
+
FALSE_BOOL = %w'false no f n 0'
|
111
|
+
|
112
|
+
# Parse any value and try to convert to the correct datatype and check the constraints.
|
113
|
+
# Will throw an exception if not valid.
|
114
|
+
# @param [Object] value Any value to parse, strings are best supported.
|
115
|
+
# @return [Object] checked and converted value
|
116
|
+
def parse(value = nil)
|
117
|
+
result = value.nil? ? self[:default] : convert(value)
|
118
|
+
check_constraint(result)
|
119
|
+
result
|
120
|
+
end
|
121
|
+
|
122
|
+
# Parse any value and try to convert to the correct datatype and check the constraints.
|
123
|
+
# Will return false if not valid, true otherwise.
|
124
|
+
# @param [Object] value Any value to check
|
125
|
+
def valid_value?(value)
|
126
|
+
begin
|
127
|
+
parse(value)
|
128
|
+
rescue
|
129
|
+
return false
|
130
|
+
end
|
131
|
+
true
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def guess_datatype
|
137
|
+
self[:datatype] || case self[:default]
|
138
|
+
when TrueClass, FalseClass
|
139
|
+
'bool'
|
140
|
+
when NilClass
|
141
|
+
'string'
|
142
|
+
when Integer
|
143
|
+
'int'
|
144
|
+
when Float
|
145
|
+
'float'
|
146
|
+
when DateTime, Date, Time
|
147
|
+
'datetime'
|
148
|
+
when Array
|
149
|
+
'array'
|
150
|
+
when Hash
|
151
|
+
'hash'
|
152
|
+
else
|
153
|
+
self[:default].class.name.downcase
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def convert(v)
|
158
|
+
case self[:datatype].to_s.downcase
|
159
|
+
when 'boolean', 'bool'
|
160
|
+
return true if TRUE_BOOL.include?(v.to_s.downcase)
|
161
|
+
return false if FALSE_BOOL.include?(v.to_s.downcase)
|
162
|
+
raise ParameterValidationError, "No boolean information in '#{v.to_s}'. " +
|
163
|
+
"Valid values are: '#{TRUE_BOOL.join('\', \'')}" +
|
164
|
+
"' and '#{FALSE_BOOL.join('\', \'')}'."
|
165
|
+
when 'string', 'nil'
|
166
|
+
return v.to_s
|
167
|
+
when 'int'
|
168
|
+
return Integer(v)
|
169
|
+
when 'float'
|
170
|
+
return Float(v)
|
171
|
+
when 'datetime'
|
172
|
+
return v.to_datetime if v.respond_to? :to_datetime
|
173
|
+
return DateTime.parse(v)
|
174
|
+
when 'array'
|
175
|
+
return v if v.is_a?(Array)
|
176
|
+
return v.split(/[,;|\s]+/) if v.is_a?(String)
|
177
|
+
# Alternatavely:
|
178
|
+
# return JSON.parse(v) if v.is_a?(String)
|
179
|
+
return v.to_a if v.respond_to?(:to_a)
|
180
|
+
when 'hash'
|
181
|
+
return v if v.is_a?(Hash)
|
182
|
+
return Hash[(0...v.size).zip(v)] if v.is_a?(Array)
|
183
|
+
return JSON.parse(v) if v.is_a?(String)
|
184
|
+
else
|
185
|
+
raise ParameterValidationError, "Datatype not supported: '#{self[:datatype]}'"
|
186
|
+
end
|
187
|
+
nil
|
188
|
+
end
|
189
|
+
|
190
|
+
def check_constraint(v, constraint = nil)
|
191
|
+
constraint ||= self[:constraint]
|
192
|
+
return if constraint.nil?
|
193
|
+
unless constraint_checker(v, constraint)
|
194
|
+
raise ParameterValidationError, "Value '#{v}' is not allowed (constraint: #{constraint})."
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def constraint_checker(v, constraint)
|
199
|
+
|
200
|
+
case constraint
|
201
|
+
when Array
|
202
|
+
constraint.each do |c|
|
203
|
+
return true if (constraint_checker(v, c) rescue false)
|
204
|
+
end
|
205
|
+
return true if constraint.include? v
|
206
|
+
when Range
|
207
|
+
return true if constraint.cover? v
|
208
|
+
when Regexp
|
209
|
+
return true if v =~ constraint
|
210
|
+
else
|
211
|
+
return true if v == constraint
|
212
|
+
end
|
213
|
+
false
|
214
|
+
end
|
215
|
+
|
216
|
+
end # Parameter
|
217
|
+
|
218
|
+
# To use the parameters a class should include the ParameterContainer module and add parameter
|
219
|
+
# statements to the body of the class definition.
|
220
|
+
#
|
221
|
+
# Besides enabling the {::Libis::Tools::ParameterContainer::ClassMethods#parameter parameter} class method to
|
222
|
+
# define parameters, the module adds the class method
|
223
|
+
# {::Libis::Tools::ParameterContainer::ClassMethods#parameter_defs parameter_defs} that will return
|
224
|
+
# a Hash with parameter names as keys and their respective parameter definitions as values.
|
225
|
+
#
|
226
|
+
# On each class instance the {::Libis::Tools::ParameterContainer#parameter parameter} method is added and serves
|
227
|
+
# as both getter and setter for parameter values.
|
228
|
+
# The methods {::Libis::Tools::ParameterContainer#[] []} and {::Libis::Tools::ParameterContainer#[]= []=} serve as
|
229
|
+
# aliases for the getter and setter calls.
|
230
|
+
#
|
231
|
+
# Additionally two protected methods are available on the instance:
|
232
|
+
# * {::Libis::Tools::ParameterContainer#parameters parameters}: returns the Hash that keeps track of the current
|
233
|
+
# parameter values for the instance.
|
234
|
+
# * {::Libis::Tools::ParameterContainer#get_parameter_definition get_parameter_defintion}: retrieves the parameter
|
235
|
+
# definition from the instance's class for the given parameter name.
|
236
|
+
#
|
237
|
+
# Any class that derives from a class that included the ParameterContainer module will automatically inherit all
|
238
|
+
# parameter definitions from all of it's base classes and can override any of these parameter definitions e.g. to
|
239
|
+
# change the default values for the parameter.
|
240
|
+
#
|
241
|
+
module ParameterContainer
|
242
|
+
|
243
|
+
# Methods created on class level.
|
244
|
+
module ClassMethods
|
245
|
+
|
246
|
+
# Get a list of all parameter definitions.
|
247
|
+
# The list is initialized with duplicates of the parameter definitions of the parent class and
|
248
|
+
# each new parameter definition updates or appends the list.
|
249
|
+
# @return [Hash] with parameter names as keys and {Parameter} instance as value.
|
250
|
+
def parameter_defs
|
251
|
+
return @parameters if @parameters
|
252
|
+
@parameters = ::Concurrent::Hash.new
|
253
|
+
begin
|
254
|
+
self.superclass.parameter_defs.
|
255
|
+
each_with_object(@parameters) do |(name, param), hash|
|
256
|
+
hash[name] = param.dup
|
257
|
+
end
|
258
|
+
rescue NoMethodError
|
259
|
+
# ignored
|
260
|
+
end
|
261
|
+
@parameters
|
262
|
+
end
|
263
|
+
|
264
|
+
# DSL method that allows creating parameter definitions on the class level.
|
265
|
+
#
|
266
|
+
# It takes only one mandatory argument which is a Hash. The first entry is interpreted as '<name>: <default>'.
|
267
|
+
# The name for the parameter should be unique and the default value can be any value
|
268
|
+
# of type TrueClass, FalseClass, String, Integer, Float, Date, Time, DateTime, Array, Hash or nil.
|
269
|
+
#
|
270
|
+
# The second up to last Hash entries are optional properties for the parameter. These are:
|
271
|
+
# * datatype: the type of values the parameter will accept. Valid values are:
|
272
|
+
# * 'bool' or 'boolean'
|
273
|
+
# * 'string'
|
274
|
+
# * 'int'
|
275
|
+
# * 'float'
|
276
|
+
# * 'datetime'
|
277
|
+
# * 'array'
|
278
|
+
# * 'hash'
|
279
|
+
# Any other value will raise an Exception when the parameter is used. The value is case-insensitive and
|
280
|
+
# if not present, the datatype will be derived from the default value with 'string' being the default for
|
281
|
+
# NilClass. In any case the parameter will try its best to convert supplied values to the proper data type.
|
282
|
+
# For instance, an Integer parameter will accept 3, 3.1415, '3' and Rational(10/3) as valid values and
|
283
|
+
# store them as the integer value 3. Likewise DateTime parameters will try to interprete date and time strings.
|
284
|
+
# * description: any descriptive text you want to add to clarify what this parameter is used for.
|
285
|
+
# Any tool can ask the class for its parameters and - for instance - can use this property to provide help
|
286
|
+
# in a GUI when asking the user for input.
|
287
|
+
# * constraint: adds a validation condition to the parameter. The condition value can be:
|
288
|
+
# * an array: only values that convert to a value in the list are considered valid.
|
289
|
+
# * a range: only values that convert to a value in the given range are considered valid.
|
290
|
+
# * a regular expression: only values that match the regular expression are considered valid.
|
291
|
+
# * a string: only values that are '==' to the constraint are considered valid.
|
292
|
+
# * frozen: if set to true, prevents the class instance to set the parameter to any value other than
|
293
|
+
# the default. Mostly useful when a derived class needs a parameter in the parent class to be set to a
|
294
|
+
# specific value. Setting a value on a frozen parameter with the 'parameter(name,value)' method throws a
|
295
|
+
# {::Libis::Tools::ParameterFrozenError}.
|
296
|
+
# * options: a hash with any additional properties that you want to associate to the parameter. Any key-value pair in this
|
297
|
+
# hash is added to the retrievable properties of the parameter. Likewise any property defined, that is not in the list of
|
298
|
+
# known properties is added to the options hash. In this aspect the ::Libis::Tools::Parameter class behaves much like an
|
299
|
+
# OpenStruct even though it is implemented as a Struct.
|
300
|
+
def parameter(options = {})
|
301
|
+
return self.parameter_defs[options] unless options.is_a? Hash
|
302
|
+
return nil if options.keys.empty?
|
303
|
+
param_def = options.shift
|
304
|
+
name = param_def.first.to_s.to_sym
|
305
|
+
default = param_def.last
|
306
|
+
param = (self.parameter_defs[name] ||= Parameter.new(name, default))
|
307
|
+
options[:default] = default
|
308
|
+
options.each { |key, value| param[key] = value if value }
|
309
|
+
param
|
310
|
+
end
|
311
|
+
|
312
|
+
end
|
313
|
+
|
314
|
+
# @!visibility private
|
315
|
+
def self.included(base)
|
316
|
+
base.extend(ClassMethods)
|
317
|
+
end
|
318
|
+
|
319
|
+
# Special constant to indicate a parameter has no value set. Nil cannot be used as it is a valid value.
|
320
|
+
NO_VALUE = '##NAV##'
|
321
|
+
|
322
|
+
# Getter/setter for parameter instances
|
323
|
+
# With only one argument (the parameter name) it returns the current value for the parameter, but the optional
|
324
|
+
# second argument will cause the method to set the parameter value. If the parameter is not available or
|
325
|
+
# the given value is not a valid value for the parameter, the method will return the special constant
|
326
|
+
# {::Libis::Tools::ParameterContainer::NO_VALUE NO_VALUE}.
|
327
|
+
#
|
328
|
+
# Setting a value on a frozen parameter with the 'parameter(name,value)' method throws a
|
329
|
+
# {::Libis::Tools::ParameterFrozenError} exception.
|
330
|
+
def parameter(name, value = NO_VALUE)
|
331
|
+
param_def = get_parameter_definition(name)
|
332
|
+
return NO_VALUE unless param_def
|
333
|
+
if value.equal? NO_VALUE
|
334
|
+
param_value = parameters[name]
|
335
|
+
param_def.parse(param_value)
|
336
|
+
else
|
337
|
+
return NO_VALUE unless param_def.valid_value?(value)
|
338
|
+
if param_def[:frozen]
|
339
|
+
raise ParameterFrozenError, "Parameter '#{param_def[:name]}' is frozen in '#{self.class.name}'"
|
340
|
+
end
|
341
|
+
parameters[name] = value
|
342
|
+
end
|
343
|
+
end
|
344
|
+
|
345
|
+
# Alias for the {#parameter} getter.
|
346
|
+
def [](name)
|
347
|
+
parameter(name)
|
348
|
+
end
|
349
|
+
|
350
|
+
# Alias for the {#parameter} setter.
|
351
|
+
# The only difference is that in case of a frozen parameter, this method silently ignores the exception,
|
352
|
+
# but the default value still will not be changed.
|
353
|
+
def []=(name, value)
|
354
|
+
parameter name, value
|
355
|
+
rescue ParameterFrozenError
|
356
|
+
# ignored
|
357
|
+
end
|
358
|
+
|
359
|
+
protected
|
360
|
+
|
361
|
+
def parameters
|
362
|
+
@parameter_values ||= Hash.new
|
363
|
+
end
|
364
|
+
|
365
|
+
def get_parameter_definition(name)
|
366
|
+
self.class.parameter_defs[name]
|
367
|
+
end
|
368
|
+
|
369
|
+
end # ParameterContainer
|
370
|
+
|
371
|
+
end # Tools
|
372
|
+
end # Libis
|
@@ -0,0 +1,196 @@
|
|
1
|
+
require 'libis/tools/extend/roo'
|
2
|
+
require 'libis/tools/extend/hash'
|
3
|
+
require 'awesome_print'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Tools
|
7
|
+
|
8
|
+
class Spreadsheet
|
9
|
+
|
10
|
+
# Spreadsheet reader.
|
11
|
+
#
|
12
|
+
# This class supports CSV, Excel 2007-2016, Excel (pre-2007) and LibreOffice/OpenOffice Calc
|
13
|
+
# thanks to the Roo (http://github.com/roo-rb/roo) project.
|
14
|
+
#
|
15
|
+
# The first argument is the file name to read. For spreadsheets, append '|' and the sheet name to specify the
|
16
|
+
# sheet to read.
|
17
|
+
#
|
18
|
+
# The second argument is a Hash with options. The options can be:
|
19
|
+
# - required: a list of headers that need to be present. The list can be an Array containing the litteral header
|
20
|
+
# values expected. Alternatively, a Hash is also allowed with alternative header names as keys and litteral
|
21
|
+
# names as values. If a :headers keys is present in the Hash with a value of true or :first, whatever is on the
|
22
|
+
# first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search with an array
|
23
|
+
# of strings as value will search for a row that contains each of the strings in the given array. Each string is
|
24
|
+
# searched by regular expression, so strings may contain wildcards.
|
25
|
+
# Default is empty array, meaning to use whatever is on the first row as header.
|
26
|
+
# - optional: a list of headers that may be present, but are not required. Similar format as above. Default is
|
27
|
+
# empty array.
|
28
|
+
# - noheader: a list of headers to force upon the sheet if no headers are present.
|
29
|
+
# - extension: :csv, :xlsx, :xlsm, :ods, :xls, :google to help the library in deciding what format the file is in.
|
30
|
+
#
|
31
|
+
# The following options are only applicable to CSV input files and are ignored otherwise.
|
32
|
+
# - encoding: the encoding of the CSV file. e.g. 'windows-1252:UTF-8' to convert the input from windows code page
|
33
|
+
# 1252 to UTF-8 during file reading
|
34
|
+
# - col_sep: column separator. Default is ',', but can be set to "\t" for TSV files.
|
35
|
+
# - quote_char: character for quoting.
|
36
|
+
#
|
37
|
+
# @param [String] file_name
|
38
|
+
# @param [Hash] opts
|
39
|
+
def initialize(file_name, opts = {})
|
40
|
+
options = {
|
41
|
+
csv_options: {
|
42
|
+
encoding: 'UTF-8',
|
43
|
+
col_sep: ',',
|
44
|
+
quote_char: '"',
|
45
|
+
}.merge([:encoding, :col_sep, :quote_char].inject({}) do |h, k|
|
46
|
+
h[k] = opts.delete(k) if opts[k]
|
47
|
+
h
|
48
|
+
end)
|
49
|
+
}.merge(opts)
|
50
|
+
|
51
|
+
required_headers = options.delete(:required) || []
|
52
|
+
optional_headers = options.delete(:optional) || []
|
53
|
+
noheader_headers = options.delete(:noheader) || []
|
54
|
+
|
55
|
+
file, sheet = file_name.split('|')
|
56
|
+
@ss = ::Roo::Spreadsheet.open(file, options)
|
57
|
+
@ss.default_sheet = sheet if sheet
|
58
|
+
|
59
|
+
@header_options = {}
|
60
|
+
|
61
|
+
check_headers(required: required_headers, optional: optional_headers, noheader: noheader_headers)
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
# Iterate over sheet content.
|
66
|
+
#
|
67
|
+
# The options Hash can contain the following keys:
|
68
|
+
# - :sheet - overwrites default sheet name
|
69
|
+
# - :required - Array or Hash of required headers
|
70
|
+
# - :optional - Array or Hash of optional headers
|
71
|
+
# - :noheader - Array of noheader headers
|
72
|
+
#
|
73
|
+
# Each iteration, a Hash will be passed with the key names as specified in the header options and the
|
74
|
+
# corresponding cell values.
|
75
|
+
#
|
76
|
+
# @param [Hash] options
|
77
|
+
def each(options = {}, &block)
|
78
|
+
@ss.default_sheet = options[:sheet] if options[:sheet]
|
79
|
+
@ss.each(check_headers(options), &block)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Parse sheet content.
|
83
|
+
#
|
84
|
+
# The options Hash can contain the following keys:
|
85
|
+
# - :sheet - overwrites default sheet name
|
86
|
+
# - :required - Array or Hash of required headers
|
87
|
+
# - :optional - Array or Hash of optional headers
|
88
|
+
# - :noheader - Array of noheader headers
|
89
|
+
#
|
90
|
+
# An Array will be returned with for each row a Hash with the key names as specified in the header options and the
|
91
|
+
# corresponding cell values.
|
92
|
+
#
|
93
|
+
# @param [Hash] options
|
94
|
+
# @return [Array<Hash>]
|
95
|
+
def parse(options = {})
|
96
|
+
@ss.default_sheet = options.delete(:sheet) if options.has_key?(:sheet)
|
97
|
+
@ss.parse(check_headers(options))
|
98
|
+
end
|
99
|
+
|
100
|
+
# Return the current row and increment the current_row pointer.
|
101
|
+
def shift
|
102
|
+
return nil unless @current_row < @ss.last_row
|
103
|
+
@current_row += 1
|
104
|
+
Hash[@ss.row(@current_row).map.with_index { |v, i| [headers[i], v] }]
|
105
|
+
end
|
106
|
+
|
107
|
+
# Set the current_row pointer back to the start
|
108
|
+
def restart
|
109
|
+
@current_row = @ss.header_line
|
110
|
+
end
|
111
|
+
|
112
|
+
# Open and iterate over sheet content.
|
113
|
+
#
|
114
|
+
# @param @see #initialize
|
115
|
+
def self.foreach(file_name, opts = {}, &block)
|
116
|
+
Libis::Tools::Spreadsheet.new(file_name, opts).each(&block)
|
117
|
+
end
|
118
|
+
|
119
|
+
def headers
|
120
|
+
(@ss.headers || {}).keys
|
121
|
+
end
|
122
|
+
|
123
|
+
private
|
124
|
+
|
125
|
+
def check_headers(options = {})
|
126
|
+
if options[:required] || options[:optional] || options[:noheader]
|
127
|
+
|
128
|
+
# defaults
|
129
|
+
ss_options = {}
|
130
|
+
required_headers = options[:required] || []
|
131
|
+
optional_headers = options[:optional] || []
|
132
|
+
|
133
|
+
# make sure required_headers is a Hash
|
134
|
+
case required_headers
|
135
|
+
when Hash
|
136
|
+
# OK
|
137
|
+
when Array
|
138
|
+
required_headers = Hash[required_headers.zip(required_headers)]
|
139
|
+
else
|
140
|
+
raise RuntimeError, 'Required headers should be either a Hash or an Array.'
|
141
|
+
end
|
142
|
+
|
143
|
+
# make sure optional_headers is a Hash
|
144
|
+
case optional_headers
|
145
|
+
when Hash
|
146
|
+
# OK
|
147
|
+
when Array
|
148
|
+
optional_headers = Hash[optional_headers.zip(optional_headers)]
|
149
|
+
else
|
150
|
+
raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
|
151
|
+
end
|
152
|
+
|
153
|
+
# make sure noheader_headers is properly intialized
|
154
|
+
noheader_headers = options[:noheader]
|
155
|
+
raise RuntimeError, 'Noheader headers should be an Array.' unless noheader_headers.is_a?(Array)
|
156
|
+
|
157
|
+
# if not set, default to both required and optional headers
|
158
|
+
noheader_headers = (required_headers.keys + optional_headers.keys) if noheader_headers.empty?
|
159
|
+
|
160
|
+
# force noheader_headers or just use first row
|
161
|
+
ss_options[:headers] = noheader_headers.empty? ? :first_row : noheader_headers
|
162
|
+
|
163
|
+
# search for whatever whas supplied
|
164
|
+
ss_options.merge!(required_headers).merge!(optional_headers)
|
165
|
+
|
166
|
+
# allow partial match for only required headers
|
167
|
+
ss_options[:partial_match] = true
|
168
|
+
ss_options[:required_headers] = required_headers.keys
|
169
|
+
|
170
|
+
# force a header check (may throw exceptions)
|
171
|
+
begin
|
172
|
+
@ss.each(ss_options.dup) { break }
|
173
|
+
rescue Roo::HeaderRowNotFoundError
|
174
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
175
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
176
|
+
rescue Roo::HeaderRowIncompleteError
|
177
|
+
if @ss.row(@ss.header_line).compact.empty?
|
178
|
+
raise RuntimeError, 'Sheet does not contain enough columns.'
|
179
|
+
else
|
180
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
181
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
185
|
+
|
186
|
+
@current_row = @ss.header_line
|
187
|
+
@header_options = ss_options
|
188
|
+
end
|
189
|
+
|
190
|
+
@header_options.dup
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
194
|
+
|
195
|
+
end
|
196
|
+
end
|