libis-tools 0.9.44 → 0.9.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/lib/libis/tools/extend/roo.rb +43 -32
- data/lib/libis/tools/spreadsheet.rb +93 -54
- data/lib/libis/tools/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e5a13d476bc924187b3f55cfc73ee1ca625cc81
|
4
|
+
data.tar.gz: a303e11aac907b4b2872bfd6d29081468d964d37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f13fccc9695b41c540c784d9e154a86879c1fcfe0fbec35bf9d1b1a5ea5133ea2409ad199da2cb88af97fbac21bca442ab3263a1ea835e426cb3d6693b7186e
|
7
|
+
data.tar.gz: baa9a0c4a0fa1ca8aa4de0f8c9f7cb23b1fa419fbabdad307b6a748e2b2d20402f7f348622114b92fe5838093c386a9d136ae3b56487f9a6fd2dc4ac961a3bdc
|
data/.travis.yml
CHANGED
@@ -5,7 +5,7 @@ rvm:
|
|
5
5
|
- 2.1.0
|
6
6
|
- 2.2.0
|
7
7
|
- ruby-head
|
8
|
-
- jruby-9.
|
8
|
+
- jruby-9.1.5.0
|
9
9
|
jdk:
|
10
10
|
- openjdk7
|
11
11
|
- oraclejdk7
|
@@ -26,6 +26,7 @@ matrix:
|
|
26
26
|
jdk: oraclejdk8
|
27
27
|
allow_failures:
|
28
28
|
- rvm: ruby-head
|
29
|
+
- rvm: jruby-9.1.5.0
|
29
30
|
branches:
|
30
31
|
only:
|
31
32
|
- master
|
@@ -1,29 +1,43 @@
|
|
1
1
|
require 'roo'
|
2
2
|
require 'roo-xls'
|
3
3
|
require 'roo-google'
|
4
|
+
require 'libis/tools/extend/hash'
|
4
5
|
|
5
6
|
module Roo
|
6
7
|
class HeaderRowIncompleteError < Error;
|
7
8
|
end
|
8
9
|
class Base
|
9
10
|
|
11
|
+
# changes:
|
12
|
+
# - added option :skip_header to prevent #each and #parse to return the header row
|
13
|
+
# - added option :partial_match to allow to use headers that only partially match the query
|
14
|
+
# - added option :required to force the result to have at least these columns
|
15
|
+
# - allow option :headers to contain an array with header labels that will be forced when no header row is found
|
16
|
+
# - improved proper range scanning (first_row->last_row and first_column->last_column)
|
17
|
+
|
18
|
+
attr_accessor :partial_match
|
19
|
+
|
10
20
|
def each(options = {})
|
11
21
|
return to_enum(:each, options) unless block_given?
|
12
22
|
|
23
|
+
skip_headers = options.delete(:skip_headers)
|
24
|
+
@partial_match = options.delete(:partial_match) if options.has_key?(:partial_match)
|
25
|
+
required_headers = options.delete(:required_headers) if options.has_key?(:required_headers)
|
26
|
+
|
13
27
|
if options.empty?
|
14
|
-
|
28
|
+
first_row.upto(last_row) do |line|
|
29
|
+
next if skip_headers && line == header_line
|
15
30
|
yield row(line)
|
16
31
|
end
|
17
32
|
else
|
18
33
|
clean_sheet_if_need(options)
|
19
|
-
search_or_set_header(options)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end]
|
34
|
+
@headers = search_or_set_header(options)
|
35
|
+
if required_headers
|
36
|
+
raise Roo::HeaderRowIncompleteError unless headers.keys & required_headers == required_headers
|
37
|
+
end
|
24
38
|
|
25
|
-
start_line =
|
26
|
-
start_line
|
39
|
+
start_line = header_line
|
40
|
+
start_line += 1 if skip_headers
|
27
41
|
start_line.upto(last_row) do |line|
|
28
42
|
yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
|
29
43
|
end
|
@@ -32,54 +46,51 @@ module Roo
|
|
32
46
|
|
33
47
|
private
|
34
48
|
|
35
|
-
def row_with(query
|
36
|
-
line_no =
|
49
|
+
def row_with(query)
|
50
|
+
line_no = first_row
|
37
51
|
each do |row|
|
38
|
-
line_no += 1
|
39
52
|
headers = query.map { |q| row.grep(q)[0] }.compact
|
40
53
|
|
41
54
|
if headers.length == query.length
|
42
55
|
@header_line = line_no
|
43
|
-
return
|
56
|
+
return headers
|
44
57
|
elsif line_no > 100
|
45
58
|
raise Roo::HeaderRowNotFoundError
|
46
59
|
elsif headers.length > 0
|
47
60
|
# partial match
|
48
61
|
@header_line = line_no
|
49
|
-
raise Roo::HeaderRowIncompleteError unless
|
50
|
-
return
|
62
|
+
raise Roo::HeaderRowIncompleteError unless partial_match
|
63
|
+
return headers
|
51
64
|
end
|
65
|
+
line_no += 1
|
52
66
|
end
|
53
67
|
raise Roo::HeaderRowNotFoundError
|
54
68
|
end
|
55
69
|
|
56
70
|
def search_or_set_header(options)
|
71
|
+
force_headers = options.delete(:headers)
|
57
72
|
if options[:header_search]
|
58
|
-
|
59
|
-
|
60
|
-
elsif [:first_row, true].include?(options[:headers])
|
61
|
-
@headers = Hash[row(first_row).map.with_index{ |x, i| [x, i + first_column] }]
|
73
|
+
row_with(options[:header_search])
|
74
|
+
elsif [:first_row, true].include?(force_headers)
|
62
75
|
@header_line = first_row
|
63
76
|
else
|
64
|
-
set_headers(options)
|
77
|
+
return set_headers(options)
|
65
78
|
end
|
79
|
+
return Hash[row(header_line).map { |x| [x, header_index(x)] }]
|
80
|
+
rescue Roo::HeaderRowNotFoundError => e
|
81
|
+
# Not OK unless a list of headers is supplied
|
82
|
+
raise e unless force_headers.is_a?(Array)
|
83
|
+
# Force the headers in the order they are given, but up to the last column
|
84
|
+
@header_line = first_row - 1
|
85
|
+
return Hash[force_headers.zip(first_column..last_column)].cleanup
|
66
86
|
end
|
67
87
|
|
68
|
-
def set_headers(hash
|
88
|
+
def set_headers(hash)
|
69
89
|
# try to find header row with all values or give an error
|
70
90
|
# then create new hash by indexing strings and keeping integers for header array
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
if @options[:force_headers]
|
75
|
-
# Finding headers failed. Force the headers in the order they are given, but up to the last column
|
76
|
-
@headers = {}
|
77
|
-
hash.keys.each.with_index { |k, i| @headers[k] = i + first_column if i + first_column <= last_column }
|
78
|
-
@header_line = first_row
|
79
|
-
@header_line -= 1 unless hash.values.any? { |v| row(1).include? v } # partial match
|
80
|
-
else
|
81
|
-
raise e
|
82
|
-
end
|
91
|
+
row_with(hash.values)
|
92
|
+
positions = Hash[row(header_line).map { |x| [x, header_index(x)] }]
|
93
|
+
Hash[positions.map { |k, v| [hash.invert[k] || k, v] }]
|
83
94
|
end
|
84
95
|
|
85
96
|
end
|
@@ -19,10 +19,13 @@ module Libis
|
|
19
19
|
# - required: a list of headers that need to be present. The list can be an Array containing the litteral header
|
20
20
|
# values expected. Alternatively, a Hash is also allowed with alternative header names as keys and litteral
|
21
21
|
# names as values. If a :headers keys is present in the Hash with a value of true or :first, whatever is on the
|
22
|
-
# first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search
|
22
|
+
# first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search with an array
|
23
|
+
# of strings as value will search for a row that contains each of the strings in the given array. Each string is
|
24
|
+
# searched by regular expression, so strings may contain wildcards.
|
23
25
|
# Default is empty array, meaning to use whatever is on the first row as header.
|
24
26
|
# - optional: a list of headers that may be present, but are not required. Similar format as above. Default is
|
25
|
-
# empty array
|
27
|
+
# empty array.
|
28
|
+
# - noheader: a list of headers to force upon the sheet if no headers are present.
|
26
29
|
# - extension: :csv, :xlsx, :xlsm, :ods, :xls, :google to help the library in deciding what format the file is in.
|
27
30
|
#
|
28
31
|
# The following options are only applicable to CSV input files and are ignored otherwise.
|
@@ -31,8 +34,6 @@ module Libis
|
|
31
34
|
# - col_sep: column separator. Default is ',', but can be set to "\t" for TSV files.
|
32
35
|
# - quote_char: character for quoting.
|
33
36
|
#
|
34
|
-
# Resources are created during initialisation and should be freed by calling the #close method.
|
35
|
-
#
|
36
37
|
# @param [String] file_name
|
37
38
|
# @param [Hash] opts
|
38
39
|
def initialize(file_name, opts = {})
|
@@ -45,18 +46,19 @@ module Libis
|
|
45
46
|
col_sep: ',',
|
46
47
|
quote_char: '"',
|
47
48
|
),
|
48
|
-
skip_headers: true,
|
49
|
-
force_headers: true,
|
50
49
|
}.merge(opts)
|
51
50
|
|
52
51
|
required_headers = options.delete(:required) || []
|
53
52
|
optional_headers = options.delete(:optional) || []
|
53
|
+
noheader_headers = options.delete(:noheader) || []
|
54
54
|
|
55
55
|
file, sheet = file_name.split('|')
|
56
56
|
@ss = ::Roo::Spreadsheet.open(file, options)
|
57
57
|
@ss.default_sheet = sheet if sheet
|
58
58
|
|
59
|
-
|
59
|
+
@header_options = {}
|
60
|
+
|
61
|
+
check_headers(required: required_headers, optional: optional_headers, noheader: noheader_headers)
|
60
62
|
|
61
63
|
end
|
62
64
|
|
@@ -66,6 +68,7 @@ module Libis
|
|
66
68
|
# - :sheet - overwrites default sheet name
|
67
69
|
# - :required - Array or Hash of required headers
|
68
70
|
# - :optional - Array or Hash of optional headers
|
71
|
+
# - :noheader - Array of noheader headers
|
69
72
|
#
|
70
73
|
# Each iteration, a Hash will be passed with the key names as specified in the header options and the
|
71
74
|
# corresponding cell values.
|
@@ -73,20 +76,39 @@ module Libis
|
|
73
76
|
# @param [Hash] options
|
74
77
|
def each(options = {}, &block)
|
75
78
|
@ss.default_sheet = options[:sheet] if options[:sheet]
|
76
|
-
@ss.each(check_headers(options
|
79
|
+
@ss.each(check_headers(options), &block)
|
77
80
|
end
|
78
81
|
|
82
|
+
# Parse sheet content.
|
83
|
+
#
|
84
|
+
# The options Hash can contain the following keys:
|
85
|
+
# - :sheet - overwrites default sheet name
|
86
|
+
# - :required - Array or Hash of required headers
|
87
|
+
# - :optional - Array or Hash of optional headers
|
88
|
+
# - :noheader - Array of noheader headers
|
89
|
+
#
|
90
|
+
# An Array will be returned with for each row a Hash with the key names as specified in the header options and the
|
91
|
+
# corresponding cell values.
|
92
|
+
#
|
93
|
+
# @param [Hash] options
|
94
|
+
# @return [Array<Hash>]
|
79
95
|
def parse(options = {})
|
80
|
-
@ss.default_sheet = options
|
81
|
-
@ss.parse(check_headers(options
|
96
|
+
@ss.default_sheet = options.delete(:sheet) if options.has_key?(:sheet)
|
97
|
+
@ss.parse(check_headers(options))
|
82
98
|
end
|
83
99
|
|
100
|
+
# Return the current row and increment the current_row pointer.
|
84
101
|
def shift
|
85
102
|
return nil unless @current_row < @ss.last_row
|
86
103
|
@current_row += 1
|
87
104
|
Hash[@ss.row(@current_row).map.with_index { |v, i| [headers[i], v] }]
|
88
105
|
end
|
89
106
|
|
107
|
+
# Set the current_row pointer back to the start
|
108
|
+
def restart
|
109
|
+
@current_row = @ss.header_line
|
110
|
+
end
|
111
|
+
|
90
112
|
# Open and iterate over sheet content.
|
91
113
|
#
|
92
114
|
# @param @see #initialize
|
@@ -95,60 +117,77 @@ module Libis
|
|
95
117
|
end
|
96
118
|
|
97
119
|
def headers
|
98
|
-
(@ss.headers || {}).keys
|
120
|
+
(@ss.headers || {}).keys
|
99
121
|
end
|
100
122
|
|
101
123
|
private
|
102
124
|
|
103
|
-
def check_headers(
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
(optional_headers.is_a?(Hash) ? optional_headers.values : optional_headers)
|
125
|
+
def check_headers(options = {})
|
126
|
+
if options[:required] || options[:optional] || options[:noheader]
|
127
|
+
|
128
|
+
# defaults
|
129
|
+
ss_options = {}
|
130
|
+
required_headers = options[:required] || []
|
131
|
+
optional_headers = options[:optional] || []
|
132
|
+
|
133
|
+
# make sure required_headers is a Hash
|
134
|
+
case required_headers
|
135
|
+
when Hash
|
136
|
+
# OK
|
137
|
+
when Array
|
138
|
+
required_headers = Hash[required_headers.zip(required_headers)]
|
139
|
+
else
|
140
|
+
raise RuntimeError, 'Required headers should be either a Hash or an Array.'
|
120
141
|
end
|
121
|
-
else
|
122
|
-
header_options =
|
123
|
-
required_headers.is_a?(Hash) ? required_headers : Hash[required_headers.map { |x| [x] * 2 }]
|
124
|
-
header_options.merge!(
|
125
|
-
optional_headers.is_a?(Hash) ? optional_headers : Hash[optional_headers.map { |x| [x] * 2 }]
|
126
|
-
)
|
127
|
-
end
|
128
142
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
143
|
+
# make sure optional_headers is a Hash
|
144
|
+
case optional_headers
|
145
|
+
when Hash
|
146
|
+
# OK
|
147
|
+
when Array
|
148
|
+
optional_headers = Hash[optional_headers.zip(optional_headers)]
|
149
|
+
else
|
150
|
+
raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
|
151
|
+
end
|
133
152
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
if
|
139
|
-
|
153
|
+
# make sure noheader_headers is properly intialized
|
154
|
+
noheader_headers = options[:noheader]
|
155
|
+
raise RuntimeError, 'Noheader headers should be an Array.' unless noheader_headers.is_a?(Array)
|
156
|
+
|
157
|
+
# if not set, default to both required and optional headers
|
158
|
+
noheader_headers = (required_headers.keys + optional_headers.keys) if noheader_headers.empty?
|
159
|
+
|
160
|
+
# force noheader_headers or just use first row
|
161
|
+
ss_options[:headers] = noheader_headers.empty? ? :first_row : noheader_headers
|
162
|
+
|
163
|
+
# search for whatever whas supplied
|
164
|
+
ss_options.merge!(required_headers).merge!(optional_headers)
|
165
|
+
|
166
|
+
# allow partial match for only required headers
|
167
|
+
ss_options[:partial_match] = true
|
168
|
+
ss_options[:required_headers] = required_headers.keys
|
169
|
+
|
170
|
+
# force a header check (may throw exceptions)
|
171
|
+
begin
|
172
|
+
@ss.each(ss_options.dup) { break }
|
173
|
+
rescue Roo::HeaderRowNotFoundError
|
174
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
175
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
176
|
+
rescue Roo::HeaderRowIncompleteError
|
177
|
+
if @ss.row(@ss.header_line).compact.empty?
|
178
|
+
raise RuntimeError, 'Sheet does not contain enough columns.'
|
179
|
+
else
|
180
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
181
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
182
|
+
|
183
|
+
end
|
140
184
|
end
|
141
|
-
elsif found_headers.size < required_headers.size
|
142
|
-
# Some, but not all headers found
|
143
|
-
raise RuntimeError, "Headers not found: #{required_headers - found_headers}."
|
144
|
-
else
|
145
|
-
# All required headers found
|
146
|
-
end
|
147
185
|
|
148
|
-
|
149
|
-
|
186
|
+
@current_row = @ss.header_line
|
187
|
+
@header_options = ss_options.merge(skip_headers: true)
|
188
|
+
end
|
150
189
|
|
151
|
-
@header_options
|
190
|
+
@header_options.dup
|
152
191
|
end
|
153
192
|
|
154
193
|
end
|
data/lib/libis/tools/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libis-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.45
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kris Dekeyser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|