libis-tools 0.9.44 → 0.9.45
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/lib/libis/tools/extend/roo.rb +43 -32
- data/lib/libis/tools/spreadsheet.rb +93 -54
- data/lib/libis/tools/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8e5a13d476bc924187b3f55cfc73ee1ca625cc81
|
4
|
+
data.tar.gz: a303e11aac907b4b2872bfd6d29081468d964d37
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8f13fccc9695b41c540c784d9e154a86879c1fcfe0fbec35bf9d1b1a5ea5133ea2409ad199da2cb88af97fbac21bca442ab3263a1ea835e426cb3d6693b7186e
|
7
|
+
data.tar.gz: baa9a0c4a0fa1ca8aa4de0f8c9f7cb23b1fa419fbabdad307b6a748e2b2d20402f7f348622114b92fe5838093c386a9d136ae3b56487f9a6fd2dc4ac961a3bdc
|
data/.travis.yml
CHANGED
@@ -5,7 +5,7 @@ rvm:
|
|
5
5
|
- 2.1.0
|
6
6
|
- 2.2.0
|
7
7
|
- ruby-head
|
8
|
-
- jruby-9.
|
8
|
+
- jruby-9.1.5.0
|
9
9
|
jdk:
|
10
10
|
- openjdk7
|
11
11
|
- oraclejdk7
|
@@ -26,6 +26,7 @@ matrix:
|
|
26
26
|
jdk: oraclejdk8
|
27
27
|
allow_failures:
|
28
28
|
- rvm: ruby-head
|
29
|
+
- rvm: jruby-9.1.5.0
|
29
30
|
branches:
|
30
31
|
only:
|
31
32
|
- master
|
@@ -1,29 +1,43 @@
|
|
1
1
|
require 'roo'
|
2
2
|
require 'roo-xls'
|
3
3
|
require 'roo-google'
|
4
|
+
require 'libis/tools/extend/hash'
|
4
5
|
|
5
6
|
module Roo
|
6
7
|
class HeaderRowIncompleteError < Error;
|
7
8
|
end
|
8
9
|
class Base
|
9
10
|
|
11
|
+
# changes:
|
12
|
+
# - added option :skip_header to prevent #each and #parse to return the header row
|
13
|
+
# - added option :partial_match to allow to use headers that only partially match the query
|
14
|
+
# - added option :required to force the result to have at least these columns
|
15
|
+
# - allow option :headers to contain an array with header labels that will be forced when no header row is found
|
16
|
+
# - improved proper range scanning (first_row->last_row and first_column->last_column)
|
17
|
+
|
18
|
+
attr_accessor :partial_match
|
19
|
+
|
10
20
|
def each(options = {})
|
11
21
|
return to_enum(:each, options) unless block_given?
|
12
22
|
|
23
|
+
skip_headers = options.delete(:skip_headers)
|
24
|
+
@partial_match = options.delete(:partial_match) if options.has_key?(:partial_match)
|
25
|
+
required_headers = options.delete(:required_headers) if options.has_key?(:required_headers)
|
26
|
+
|
13
27
|
if options.empty?
|
14
|
-
|
28
|
+
first_row.upto(last_row) do |line|
|
29
|
+
next if skip_headers && line == header_line
|
15
30
|
yield row(line)
|
16
31
|
end
|
17
32
|
else
|
18
33
|
clean_sheet_if_need(options)
|
19
|
-
search_or_set_header(options)
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
end]
|
34
|
+
@headers = search_or_set_header(options)
|
35
|
+
if required_headers
|
36
|
+
raise Roo::HeaderRowIncompleteError unless headers.keys & required_headers == required_headers
|
37
|
+
end
|
24
38
|
|
25
|
-
start_line =
|
26
|
-
start_line
|
39
|
+
start_line = header_line
|
40
|
+
start_line += 1 if skip_headers
|
27
41
|
start_line.upto(last_row) do |line|
|
28
42
|
yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
|
29
43
|
end
|
@@ -32,54 +46,51 @@ module Roo
|
|
32
46
|
|
33
47
|
private
|
34
48
|
|
35
|
-
def row_with(query
|
36
|
-
line_no =
|
49
|
+
def row_with(query)
|
50
|
+
line_no = first_row
|
37
51
|
each do |row|
|
38
|
-
line_no += 1
|
39
52
|
headers = query.map { |q| row.grep(q)[0] }.compact
|
40
53
|
|
41
54
|
if headers.length == query.length
|
42
55
|
@header_line = line_no
|
43
|
-
return
|
56
|
+
return headers
|
44
57
|
elsif line_no > 100
|
45
58
|
raise Roo::HeaderRowNotFoundError
|
46
59
|
elsif headers.length > 0
|
47
60
|
# partial match
|
48
61
|
@header_line = line_no
|
49
|
-
raise Roo::HeaderRowIncompleteError unless
|
50
|
-
return
|
62
|
+
raise Roo::HeaderRowIncompleteError unless partial_match
|
63
|
+
return headers
|
51
64
|
end
|
65
|
+
line_no += 1
|
52
66
|
end
|
53
67
|
raise Roo::HeaderRowNotFoundError
|
54
68
|
end
|
55
69
|
|
56
70
|
def search_or_set_header(options)
|
71
|
+
force_headers = options.delete(:headers)
|
57
72
|
if options[:header_search]
|
58
|
-
|
59
|
-
|
60
|
-
elsif [:first_row, true].include?(options[:headers])
|
61
|
-
@headers = Hash[row(first_row).map.with_index{ |x, i| [x, i + first_column] }]
|
73
|
+
row_with(options[:header_search])
|
74
|
+
elsif [:first_row, true].include?(force_headers)
|
62
75
|
@header_line = first_row
|
63
76
|
else
|
64
|
-
set_headers(options)
|
77
|
+
return set_headers(options)
|
65
78
|
end
|
79
|
+
return Hash[row(header_line).map { |x| [x, header_index(x)] }]
|
80
|
+
rescue Roo::HeaderRowNotFoundError => e
|
81
|
+
# Not OK unless a list of headers is supplied
|
82
|
+
raise e unless force_headers.is_a?(Array)
|
83
|
+
# Force the headers in the order they are given, but up to the last column
|
84
|
+
@header_line = first_row - 1
|
85
|
+
return Hash[force_headers.zip(first_column..last_column)].cleanup
|
66
86
|
end
|
67
87
|
|
68
|
-
def set_headers(hash
|
88
|
+
def set_headers(hash)
|
69
89
|
# try to find header row with all values or give an error
|
70
90
|
# then create new hash by indexing strings and keeping integers for header array
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
if @options[:force_headers]
|
75
|
-
# Finding headers failed. Force the headers in the order they are given, but up to the last column
|
76
|
-
@headers = {}
|
77
|
-
hash.keys.each.with_index { |k, i| @headers[k] = i + first_column if i + first_column <= last_column }
|
78
|
-
@header_line = first_row
|
79
|
-
@header_line -= 1 unless hash.values.any? { |v| row(1).include? v } # partial match
|
80
|
-
else
|
81
|
-
raise e
|
82
|
-
end
|
91
|
+
row_with(hash.values)
|
92
|
+
positions = Hash[row(header_line).map { |x| [x, header_index(x)] }]
|
93
|
+
Hash[positions.map { |k, v| [hash.invert[k] || k, v] }]
|
83
94
|
end
|
84
95
|
|
85
96
|
end
|
@@ -19,10 +19,13 @@ module Libis
|
|
19
19
|
# - required: a list of headers that need to be present. The list can be an Array containing the litteral header
|
20
20
|
# values expected. Alternatively, a Hash is also allowed with alternative header names as keys and litteral
|
21
21
|
# names as values. If a :headers keys is present in the Hash with a value of true or :first, whatever is on the
|
22
|
-
# first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search
|
22
|
+
# first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search with an array
|
23
|
+
# of strings as value will search for a row that contains each of the strings in the given array. Each string is
|
24
|
+
# searched by regular expression, so strings may contain wildcards.
|
23
25
|
# Default is empty array, meaning to use whatever is on the first row as header.
|
24
26
|
# - optional: a list of headers that may be present, but are not required. Similar format as above. Default is
|
25
|
-
# empty array
|
27
|
+
# empty array.
|
28
|
+
# - noheader: a list of headers to force upon the sheet if no headers are present.
|
26
29
|
# - extension: :csv, :xlsx, :xlsm, :ods, :xls, :google to help the library in deciding what format the file is in.
|
27
30
|
#
|
28
31
|
# The following options are only applicable to CSV input files and are ignored otherwise.
|
@@ -31,8 +34,6 @@ module Libis
|
|
31
34
|
# - col_sep: column separator. Default is ',', but can be set to "\t" for TSV files.
|
32
35
|
# - quote_char: character for quoting.
|
33
36
|
#
|
34
|
-
# Resources are created during initialisation and should be freed by calling the #close method.
|
35
|
-
#
|
36
37
|
# @param [String] file_name
|
37
38
|
# @param [Hash] opts
|
38
39
|
def initialize(file_name, opts = {})
|
@@ -45,18 +46,19 @@ module Libis
|
|
45
46
|
col_sep: ',',
|
46
47
|
quote_char: '"',
|
47
48
|
),
|
48
|
-
skip_headers: true,
|
49
|
-
force_headers: true,
|
50
49
|
}.merge(opts)
|
51
50
|
|
52
51
|
required_headers = options.delete(:required) || []
|
53
52
|
optional_headers = options.delete(:optional) || []
|
53
|
+
noheader_headers = options.delete(:noheader) || []
|
54
54
|
|
55
55
|
file, sheet = file_name.split('|')
|
56
56
|
@ss = ::Roo::Spreadsheet.open(file, options)
|
57
57
|
@ss.default_sheet = sheet if sheet
|
58
58
|
|
59
|
-
|
59
|
+
@header_options = {}
|
60
|
+
|
61
|
+
check_headers(required: required_headers, optional: optional_headers, noheader: noheader_headers)
|
60
62
|
|
61
63
|
end
|
62
64
|
|
@@ -66,6 +68,7 @@ module Libis
|
|
66
68
|
# - :sheet - overwrites default sheet name
|
67
69
|
# - :required - Array or Hash of required headers
|
68
70
|
# - :optional - Array or Hash of optional headers
|
71
|
+
# - :noheader - Array of noheader headers
|
69
72
|
#
|
70
73
|
# Each iteration, a Hash will be passed with the key names as specified in the header options and the
|
71
74
|
# corresponding cell values.
|
@@ -73,20 +76,39 @@ module Libis
|
|
73
76
|
# @param [Hash] options
|
74
77
|
def each(options = {}, &block)
|
75
78
|
@ss.default_sheet = options[:sheet] if options[:sheet]
|
76
|
-
@ss.each(check_headers(options
|
79
|
+
@ss.each(check_headers(options), &block)
|
77
80
|
end
|
78
81
|
|
82
|
+
# Parse sheet content.
|
83
|
+
#
|
84
|
+
# The options Hash can contain the following keys:
|
85
|
+
# - :sheet - overwrites default sheet name
|
86
|
+
# - :required - Array or Hash of required headers
|
87
|
+
# - :optional - Array or Hash of optional headers
|
88
|
+
# - :noheader - Array of noheader headers
|
89
|
+
#
|
90
|
+
# An Array will be returned with for each row a Hash with the key names as specified in the header options and the
|
91
|
+
# corresponding cell values.
|
92
|
+
#
|
93
|
+
# @param [Hash] options
|
94
|
+
# @return [Array<Hash>]
|
79
95
|
def parse(options = {})
|
80
|
-
@ss.default_sheet = options
|
81
|
-
@ss.parse(check_headers(options
|
96
|
+
@ss.default_sheet = options.delete(:sheet) if options.has_key?(:sheet)
|
97
|
+
@ss.parse(check_headers(options))
|
82
98
|
end
|
83
99
|
|
100
|
+
# Return the current row and increment the current_row pointer.
|
84
101
|
def shift
|
85
102
|
return nil unless @current_row < @ss.last_row
|
86
103
|
@current_row += 1
|
87
104
|
Hash[@ss.row(@current_row).map.with_index { |v, i| [headers[i], v] }]
|
88
105
|
end
|
89
106
|
|
107
|
+
# Set the current_row pointer back to the start
|
108
|
+
def restart
|
109
|
+
@current_row = @ss.header_line
|
110
|
+
end
|
111
|
+
|
90
112
|
# Open and iterate over sheet content.
|
91
113
|
#
|
92
114
|
# @param @see #initialize
|
@@ -95,60 +117,77 @@ module Libis
|
|
95
117
|
end
|
96
118
|
|
97
119
|
def headers
|
98
|
-
(@ss.headers || {}).keys
|
120
|
+
(@ss.headers || {}).keys
|
99
121
|
end
|
100
122
|
|
101
123
|
private
|
102
124
|
|
103
|
-
def check_headers(
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
(optional_headers.is_a?(Hash) ? optional_headers.values : optional_headers)
|
125
|
+
def check_headers(options = {})
|
126
|
+
if options[:required] || options[:optional] || options[:noheader]
|
127
|
+
|
128
|
+
# defaults
|
129
|
+
ss_options = {}
|
130
|
+
required_headers = options[:required] || []
|
131
|
+
optional_headers = options[:optional] || []
|
132
|
+
|
133
|
+
# make sure required_headers is a Hash
|
134
|
+
case required_headers
|
135
|
+
when Hash
|
136
|
+
# OK
|
137
|
+
when Array
|
138
|
+
required_headers = Hash[required_headers.zip(required_headers)]
|
139
|
+
else
|
140
|
+
raise RuntimeError, 'Required headers should be either a Hash or an Array.'
|
120
141
|
end
|
121
|
-
else
|
122
|
-
header_options =
|
123
|
-
required_headers.is_a?(Hash) ? required_headers : Hash[required_headers.map { |x| [x] * 2 }]
|
124
|
-
header_options.merge!(
|
125
|
-
optional_headers.is_a?(Hash) ? optional_headers : Hash[optional_headers.map { |x| [x] * 2 }]
|
126
|
-
)
|
127
|
-
end
|
128
142
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
143
|
+
# make sure optional_headers is a Hash
|
144
|
+
case optional_headers
|
145
|
+
when Hash
|
146
|
+
# OK
|
147
|
+
when Array
|
148
|
+
optional_headers = Hash[optional_headers.zip(optional_headers)]
|
149
|
+
else
|
150
|
+
raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
|
151
|
+
end
|
133
152
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
if
|
139
|
-
|
153
|
+
# make sure noheader_headers is properly intialized
|
154
|
+
noheader_headers = options[:noheader]
|
155
|
+
raise RuntimeError, 'Noheader headers should be an Array.' unless noheader_headers.is_a?(Array)
|
156
|
+
|
157
|
+
# if not set, default to both required and optional headers
|
158
|
+
noheader_headers = (required_headers.keys + optional_headers.keys) if noheader_headers.empty?
|
159
|
+
|
160
|
+
# force noheader_headers or just use first row
|
161
|
+
ss_options[:headers] = noheader_headers.empty? ? :first_row : noheader_headers
|
162
|
+
|
163
|
+
# search for whatever whas supplied
|
164
|
+
ss_options.merge!(required_headers).merge!(optional_headers)
|
165
|
+
|
166
|
+
# allow partial match for only required headers
|
167
|
+
ss_options[:partial_match] = true
|
168
|
+
ss_options[:required_headers] = required_headers.keys
|
169
|
+
|
170
|
+
# force a header check (may throw exceptions)
|
171
|
+
begin
|
172
|
+
@ss.each(ss_options.dup) { break }
|
173
|
+
rescue Roo::HeaderRowNotFoundError
|
174
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
175
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
176
|
+
rescue Roo::HeaderRowIncompleteError
|
177
|
+
if @ss.row(@ss.header_line).compact.empty?
|
178
|
+
raise RuntimeError, 'Sheet does not contain enough columns.'
|
179
|
+
else
|
180
|
+
found_headers = required_headers.keys & @ss.headers.keys
|
181
|
+
raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
|
182
|
+
|
183
|
+
end
|
140
184
|
end
|
141
|
-
elsif found_headers.size < required_headers.size
|
142
|
-
# Some, but not all headers found
|
143
|
-
raise RuntimeError, "Headers not found: #{required_headers - found_headers}."
|
144
|
-
else
|
145
|
-
# All required headers found
|
146
|
-
end
|
147
185
|
|
148
|
-
|
149
|
-
|
186
|
+
@current_row = @ss.header_line
|
187
|
+
@header_options = ss_options.merge(skip_headers: true)
|
188
|
+
end
|
150
189
|
|
151
|
-
@header_options
|
190
|
+
@header_options.dup
|
152
191
|
end
|
153
192
|
|
154
193
|
end
|
data/lib/libis/tools/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libis-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.45
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kris Dekeyser
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|