libis-tools 0.9.44 → 0.9.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9f31ca736bcd249250f63da7beb8077f62884d13
4
- data.tar.gz: 39082c83a169c8d1327d04b7abf0cc4be359c5e9
3
+ metadata.gz: 8e5a13d476bc924187b3f55cfc73ee1ca625cc81
4
+ data.tar.gz: a303e11aac907b4b2872bfd6d29081468d964d37
5
5
  SHA512:
6
- metadata.gz: 50863594daeb15513f04c45362499b48530b68d44a5488632a23eeb1624cbe1a9822ea007559fe76aab3561cfcfd51ec1259971aed8c096786389e33a2c7b98a
7
- data.tar.gz: 148b11b3ed23b84b99175642ea662d0f43e0c1d3c797cbfd889b99440c66e51935975332f4fc41d707722bc4936f87d421bef131fbeeebdcf61165ea0ea113e3
6
+ metadata.gz: 8f13fccc9695b41c540c784d9e154a86879c1fcfe0fbec35bf9d1b1a5ea5133ea2409ad199da2cb88af97fbac21bca442ab3263a1ea835e426cb3d6693b7186e
7
+ data.tar.gz: baa9a0c4a0fa1ca8aa4de0f8c9f7cb23b1fa419fbabdad307b6a748e2b2d20402f7f348622114b92fe5838093c386a9d136ae3b56487f9a6fd2dc4ac961a3bdc
data/.travis.yml CHANGED
@@ -5,7 +5,7 @@ rvm:
5
5
  - 2.1.0
6
6
  - 2.2.0
7
7
  - ruby-head
8
- - jruby-9.0.1.0
8
+ - jruby-9.1.5.0
9
9
  jdk:
10
10
  - openjdk7
11
11
  - oraclejdk7
@@ -26,6 +26,7 @@ matrix:
26
26
  jdk: oraclejdk8
27
27
  allow_failures:
28
28
  - rvm: ruby-head
29
+ - rvm: jruby-9.1.5.0
29
30
  branches:
30
31
  only:
31
32
  - master
@@ -1,29 +1,43 @@
1
1
  require 'roo'
2
2
  require 'roo-xls'
3
3
  require 'roo-google'
4
+ require 'libis/tools/extend/hash'
4
5
 
5
6
  module Roo
6
7
  class HeaderRowIncompleteError < Error;
7
8
  end
8
9
  class Base
9
10
 
11
+ # changes:
12
+ # - added option :skip_header to prevent #each and #parse to return the header row
13
+ # - added option :partial_match to allow to use headers that only partially match the query
14
+ # - added option :required to force the result to have at least these columns
15
+ # - allow option :headers to contain an array with header labels that will be forced when no header row is found
16
+ # - improved proper range scanning (first_row->last_row and first_column->last_column)
17
+
18
+ attr_accessor :partial_match
19
+
10
20
  def each(options = {})
11
21
  return to_enum(:each, options) unless block_given?
12
22
 
23
+ skip_headers = options.delete(:skip_headers)
24
+ @partial_match = options.delete(:partial_match) if options.has_key?(:partial_match)
25
+ required_headers = options.delete(:required_headers) if options.has_key?(:required_headers)
26
+
13
27
  if options.empty?
14
- 1.upto(last_row) do |line|
28
+ first_row.upto(last_row) do |line|
29
+ next if skip_headers && line == header_line
15
30
  yield row(line)
16
31
  end
17
32
  else
18
33
  clean_sheet_if_need(options)
19
- search_or_set_header(options)
20
- headers = @headers ||
21
- Hash[(first_column..last_column).map do |col|
22
- [cell(@header_line, col), col]
23
- end]
34
+ @headers = search_or_set_header(options)
35
+ if required_headers
36
+ raise Roo::HeaderRowIncompleteError unless headers.keys & required_headers == required_headers
37
+ end
24
38
 
25
- start_line = @header_line || 1
26
- start_line = (@header_line || 0) + 1 if @options[:skip_headers]
39
+ start_line = header_line
40
+ start_line += 1 if skip_headers
27
41
  start_line.upto(last_row) do |line|
28
42
  yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
29
43
  end
@@ -32,54 +46,51 @@ module Roo
32
46
 
33
47
  private
34
48
 
35
- def row_with(query, return_headers = false)
36
- line_no = 0
49
+ def row_with(query)
50
+ line_no = first_row
37
51
  each do |row|
38
- line_no += 1
39
52
  headers = query.map { |q| row.grep(q)[0] }.compact
40
53
 
41
54
  if headers.length == query.length
42
55
  @header_line = line_no
43
- return return_headers ? headers : line_no
56
+ return headers
44
57
  elsif line_no > 100
45
58
  raise Roo::HeaderRowNotFoundError
46
59
  elsif headers.length > 0
47
60
  # partial match
48
61
  @header_line = line_no
49
- raise Roo::HeaderRowIncompleteError unless @options[:force_headers]
50
- return return_headers ? headers : line_no
62
+ raise Roo::HeaderRowIncompleteError unless partial_match
63
+ return headers
51
64
  end
65
+ line_no += 1
52
66
  end
53
67
  raise Roo::HeaderRowNotFoundError
54
68
  end
55
69
 
56
70
  def search_or_set_header(options)
71
+ force_headers = options.delete(:headers)
57
72
  if options[:header_search]
58
- @headers = nil
59
- @header_line = row_with(options[:header_search])
60
- elsif [:first_row, true].include?(options[:headers])
61
- @headers = Hash[row(first_row).map.with_index{ |x, i| [x, i + first_column] }]
73
+ row_with(options[:header_search])
74
+ elsif [:first_row, true].include?(force_headers)
62
75
  @header_line = first_row
63
76
  else
64
- set_headers(options)
77
+ return set_headers(options)
65
78
  end
79
+ return Hash[row(header_line).map { |x| [x, header_index(x)] }]
80
+ rescue Roo::HeaderRowNotFoundError => e
81
+ # Not OK unless a list of headers is supplied
82
+ raise e unless force_headers.is_a?(Array)
83
+ # Force the headers in the order they are given, but up to the last column
84
+ @header_line = first_row - 1
85
+ return Hash[force_headers.zip(first_column..last_column)].cleanup
66
86
  end
67
87
 
68
- def set_headers(hash = {})
88
+ def set_headers(hash)
69
89
  # try to find header row with all values or give an error
70
90
  # then create new hash by indexing strings and keeping integers for header array
71
- @headers = row_with(hash.values, true)
72
- @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })]
73
- rescue Roo::HeaderRowNotFoundError => e
74
- if @options[:force_headers]
75
- # Finding headers failed. Force the headers in the order they are given, but up to the last column
76
- @headers = {}
77
- hash.keys.each.with_index { |k, i| @headers[k] = i + first_column if i + first_column <= last_column }
78
- @header_line = first_row
79
- @header_line -= 1 unless hash.values.any? { |v| row(1).include? v } # partial match
80
- else
81
- raise e
82
- end
91
+ row_with(hash.values)
92
+ positions = Hash[row(header_line).map { |x| [x, header_index(x)] }]
93
+ Hash[positions.map { |k, v| [hash.invert[k] || k, v] }]
83
94
  end
84
95
 
85
96
  end
@@ -19,10 +19,13 @@ module Libis
19
19
  # - required: a list of headers that need to be present. The list can be an Array containing the litteral header
20
20
  # values expected. Alternatively, a Hash is also allowed with alternative header names as keys and litteral
21
21
  # names as values. If a :headers keys is present in the Hash with a value of true or :first, whatever is on the
22
- # first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search
22
+ # first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search with an array
23
+ # of strings as value will search for a row that contains each of the strings in the given array. Each string is
24
+ # searched by regular expression, so strings may contain wildcards.
23
25
  # Default is empty array, meaning to use whatever is on the first row as header.
24
26
  # - optional: a list of headers that may be present, but are not required. Similar format as above. Default is
25
- # empty array
27
+ # empty array.
28
+ # - noheader: a list of headers to force upon the sheet if no headers are present.
26
29
  # - extension: :csv, :xlsx, :xlsm, :ods, :xls, :google to help the library in deciding what format the file is in.
27
30
  #
28
31
  # The following options are only applicable to CSV input files and are ignored otherwise.
@@ -31,8 +34,6 @@ module Libis
31
34
  # - col_sep: column separator. Default is ',', but can be set to "\t" for TSV files.
32
35
  # - quote_char: character for quoting.
33
36
  #
34
- # Resources are created during initialisation and should be freed by calling the #close method.
35
- #
36
37
  # @param [String] file_name
37
38
  # @param [Hash] opts
38
39
  def initialize(file_name, opts = {})
@@ -45,18 +46,19 @@ module Libis
45
46
  col_sep: ',',
46
47
  quote_char: '"',
47
48
  ),
48
- skip_headers: true,
49
- force_headers: true,
50
49
  }.merge(opts)
51
50
 
52
51
  required_headers = options.delete(:required) || []
53
52
  optional_headers = options.delete(:optional) || []
53
+ noheader_headers = options.delete(:noheader) || []
54
54
 
55
55
  file, sheet = file_name.split('|')
56
56
  @ss = ::Roo::Spreadsheet.open(file, options)
57
57
  @ss.default_sheet = sheet if sheet
58
58
 
59
- check_headers(required_headers, optional_headers)
59
+ @header_options = {}
60
+
61
+ check_headers(required: required_headers, optional: optional_headers, noheader: noheader_headers)
60
62
 
61
63
  end
62
64
 
@@ -66,6 +68,7 @@ module Libis
66
68
  # - :sheet - overwrites default sheet name
67
69
  # - :required - Array or Hash of required headers
68
70
  # - :optional - Array or Hash of optional headers
71
+ # - :noheader - Array of noheader headers
69
72
  #
70
73
  # Each iteration, a Hash will be passed with the key names as specified in the header options and the
71
74
  # corresponding cell values.
@@ -73,20 +76,39 @@ module Libis
73
76
  # @param [Hash] options
74
77
  def each(options = {}, &block)
75
78
  @ss.default_sheet = options[:sheet] if options[:sheet]
76
- @ss.each(check_headers(options[:required], options[:optional]), &block)
79
+ @ss.each(check_headers(options), &block)
77
80
  end
78
81
 
82
+ # Parse sheet content.
83
+ #
84
+ # The options Hash can contain the following keys:
85
+ # - :sheet - overwrites default sheet name
86
+ # - :required - Array or Hash of required headers
87
+ # - :optional - Array or Hash of optional headers
88
+ # - :noheader - Array of noheader headers
89
+ #
90
+ # An Array will be returned with for each row a Hash with the key names as specified in the header options and the
91
+ # corresponding cell values.
92
+ #
93
+ # @param [Hash] options
94
+ # @return [Array<Hash>]
79
95
  def parse(options = {})
80
- @ss.default_sheet = options[:sheet] if options[:sheet]
81
- @ss.parse(check_headers(options[:required], options[:optional]))
96
+ @ss.default_sheet = options.delete(:sheet) if options.has_key?(:sheet)
97
+ @ss.parse(check_headers(options))
82
98
  end
83
99
 
100
+ # Return the current row and increment the current_row pointer.
84
101
  def shift
85
102
  return nil unless @current_row < @ss.last_row
86
103
  @current_row += 1
87
104
  Hash[@ss.row(@current_row).map.with_index { |v, i| [headers[i], v] }]
88
105
  end
89
106
 
107
+ # Set the current_row pointer back to the start
108
+ def restart
109
+ @current_row = @ss.header_line
110
+ end
111
+
90
112
  # Open and iterate over sheet content.
91
113
  #
92
114
  # @param @see #initialize
@@ -95,60 +117,77 @@ module Libis
95
117
  end
96
118
 
97
119
  def headers
98
- (@ss.headers || {}).keys + @extra_headers
120
+ (@ss.headers || {}).keys
99
121
  end
100
122
 
101
123
  private
102
124
 
103
- def check_headers(required_headers, optional_headers)
104
- return @header_options unless required_headers || optional_headers
105
- header_options = {}
106
- required_headers ||= []
107
- optional_headers ||= []
108
- unless required_headers.is_a?(Hash) || required_headers.is_a?(Array)
109
- raise RuntimeError, 'Required headers should be either a Hash or an Array.'
110
- end
111
- unless optional_headers.is_a?(Hash) || optional_headers.is_a?(Array)
112
- raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
113
- end
114
- if required_headers.empty?
115
- if optional_headers.empty?
116
- header_options[:headers] = :first_row
117
- else
118
- header_options[:header_search] =
119
- (optional_headers.is_a?(Hash) ? optional_headers.values : optional_headers)
125
+ def check_headers(options = {})
126
+ if options[:required] || options[:optional] || options[:noheader]
127
+
128
+ # defaults
129
+ ss_options = {}
130
+ required_headers = options[:required] || []
131
+ optional_headers = options[:optional] || []
132
+
133
+ # make sure required_headers is a Hash
134
+ case required_headers
135
+ when Hash
136
+ # OK
137
+ when Array
138
+ required_headers = Hash[required_headers.zip(required_headers)]
139
+ else
140
+ raise RuntimeError, 'Required headers should be either a Hash or an Array.'
120
141
  end
121
- else
122
- header_options =
123
- required_headers.is_a?(Hash) ? required_headers : Hash[required_headers.map { |x| [x] * 2 }]
124
- header_options.merge!(
125
- optional_headers.is_a?(Hash) ? optional_headers : Hash[optional_headers.map { |x| [x] * 2 }]
126
- )
127
- end
128
142
 
129
- required_headers = required_headers.values if required_headers.is_a?(Hash)
130
-
131
- @ss.each(header_options) { break }
132
- @current_row = @ss.header_line
143
+ # make sure optional_headers is a Hash
144
+ case optional_headers
145
+ when Hash
146
+ # OK
147
+ when Array
148
+ optional_headers = Hash[optional_headers.zip(optional_headers)]
149
+ else
150
+ raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
151
+ end
133
152
 
134
- # checks
135
- found_headers = required_headers & @ss.row([@current_row, 1].max)
136
- if found_headers.empty?
137
- # No headers found - check if there are enough columns to satisfy the required headers
138
- if required_headers.size > (@ss.last_column - @ss.first_column) + 1
139
- raise RuntimeError, 'Sheet does not contain enough columns.'
153
+ # make sure noheader_headers is properly intialized
154
+ noheader_headers = options[:noheader]
155
+ raise RuntimeError, 'Noheader headers should be an Array.' unless noheader_headers.is_a?(Array)
156
+
157
+ # if not set, default to both required and optional headers
158
+ noheader_headers = (required_headers.keys + optional_headers.keys) if noheader_headers.empty?
159
+
160
+ # force noheader_headers or just use first row
161
+ ss_options[:headers] = noheader_headers.empty? ? :first_row : noheader_headers
162
+
163
+ # search for whatever whas supplied
164
+ ss_options.merge!(required_headers).merge!(optional_headers)
165
+
166
+ # allow partial match for only required headers
167
+ ss_options[:partial_match] = true
168
+ ss_options[:required_headers] = required_headers.keys
169
+
170
+ # force a header check (may throw exceptions)
171
+ begin
172
+ @ss.each(ss_options.dup) { break }
173
+ rescue Roo::HeaderRowNotFoundError
174
+ found_headers = required_headers.keys & @ss.headers.keys
175
+ raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
176
+ rescue Roo::HeaderRowIncompleteError
177
+ if @ss.row(@ss.header_line).compact.empty?
178
+ raise RuntimeError, 'Sheet does not contain enough columns.'
179
+ else
180
+ found_headers = required_headers.keys & @ss.headers.keys
181
+ raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
182
+
183
+ end
140
184
  end
141
- elsif found_headers.size < required_headers.size
142
- # Some, but not all headers found
143
- raise RuntimeError, "Headers not found: #{required_headers - found_headers}."
144
- else
145
- # All required headers found
146
- end
147
185
 
148
- @extra_headers = (required_headers.empty? && optional_headers.empty?) ? [] :
149
- @ss.row(@ss.header_line).keep_if { |x| x && !header_options.values.include?(x) }
186
+ @current_row = @ss.header_line
187
+ @header_options = ss_options.merge(skip_headers: true)
188
+ end
150
189
 
151
- @header_options = header_options.merge(Hash[@extra_headers.map { |v| [v] * 2 }])
190
+ @header_options.dup
152
191
  end
153
192
 
154
193
  end
@@ -1,5 +1,5 @@
1
1
  module Libis
2
2
  module Tools
3
- VERSION = '0.9.44'
3
+ VERSION = '0.9.45'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libis-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.44
4
+ version: 0.9.45
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kris Dekeyser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-22 00:00:00.000000000 Z
11
+ date: 2016-11-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler