libis-tools 0.9.44 → 0.9.45

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9f31ca736bcd249250f63da7beb8077f62884d13
4
- data.tar.gz: 39082c83a169c8d1327d04b7abf0cc4be359c5e9
3
+ metadata.gz: 8e5a13d476bc924187b3f55cfc73ee1ca625cc81
4
+ data.tar.gz: a303e11aac907b4b2872bfd6d29081468d964d37
5
5
  SHA512:
6
- metadata.gz: 50863594daeb15513f04c45362499b48530b68d44a5488632a23eeb1624cbe1a9822ea007559fe76aab3561cfcfd51ec1259971aed8c096786389e33a2c7b98a
7
- data.tar.gz: 148b11b3ed23b84b99175642ea662d0f43e0c1d3c797cbfd889b99440c66e51935975332f4fc41d707722bc4936f87d421bef131fbeeebdcf61165ea0ea113e3
6
+ metadata.gz: 8f13fccc9695b41c540c784d9e154a86879c1fcfe0fbec35bf9d1b1a5ea5133ea2409ad199da2cb88af97fbac21bca442ab3263a1ea835e426cb3d6693b7186e
7
+ data.tar.gz: baa9a0c4a0fa1ca8aa4de0f8c9f7cb23b1fa419fbabdad307b6a748e2b2d20402f7f348622114b92fe5838093c386a9d136ae3b56487f9a6fd2dc4ac961a3bdc
data/.travis.yml CHANGED
@@ -5,7 +5,7 @@ rvm:
5
5
  - 2.1.0
6
6
  - 2.2.0
7
7
  - ruby-head
8
- - jruby-9.0.1.0
8
+ - jruby-9.1.5.0
9
9
  jdk:
10
10
  - openjdk7
11
11
  - oraclejdk7
@@ -26,6 +26,7 @@ matrix:
26
26
  jdk: oraclejdk8
27
27
  allow_failures:
28
28
  - rvm: ruby-head
29
+ - rvm: jruby-9.1.5.0
29
30
  branches:
30
31
  only:
31
32
  - master
@@ -1,29 +1,43 @@
1
1
  require 'roo'
2
2
  require 'roo-xls'
3
3
  require 'roo-google'
4
+ require 'libis/tools/extend/hash'
4
5
 
5
6
  module Roo
6
7
  class HeaderRowIncompleteError < Error;
7
8
  end
8
9
  class Base
9
10
 
11
+ # changes:
12
+ # - added option :skip_header to prevent #each and #parse to return the header row
13
+ # - added option :partial_match to allow to use headers that only partially match the query
14
+ # - added option :required to force the result to have at least these columns
15
+ # - allow option :headers to contain an array with header labels that will be forced when no header row is found
16
+ # - improved proper range scanning (first_row->last_row and first_column->last_column)
17
+
18
+ attr_accessor :partial_match
19
+
10
20
  def each(options = {})
11
21
  return to_enum(:each, options) unless block_given?
12
22
 
23
+ skip_headers = options.delete(:skip_headers)
24
+ @partial_match = options.delete(:partial_match) if options.has_key?(:partial_match)
25
+ required_headers = options.delete(:required_headers) if options.has_key?(:required_headers)
26
+
13
27
  if options.empty?
14
- 1.upto(last_row) do |line|
28
+ first_row.upto(last_row) do |line|
29
+ next if skip_headers && line == header_line
15
30
  yield row(line)
16
31
  end
17
32
  else
18
33
  clean_sheet_if_need(options)
19
- search_or_set_header(options)
20
- headers = @headers ||
21
- Hash[(first_column..last_column).map do |col|
22
- [cell(@header_line, col), col]
23
- end]
34
+ @headers = search_or_set_header(options)
35
+ if required_headers
36
+ raise Roo::HeaderRowIncompleteError unless headers.keys & required_headers == required_headers
37
+ end
24
38
 
25
- start_line = @header_line || 1
26
- start_line = (@header_line || 0) + 1 if @options[:skip_headers]
39
+ start_line = header_line
40
+ start_line += 1 if skip_headers
27
41
  start_line.upto(last_row) do |line|
28
42
  yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
29
43
  end
@@ -32,54 +46,51 @@ module Roo
32
46
 
33
47
  private
34
48
 
35
- def row_with(query, return_headers = false)
36
- line_no = 0
49
+ def row_with(query)
50
+ line_no = first_row
37
51
  each do |row|
38
- line_no += 1
39
52
  headers = query.map { |q| row.grep(q)[0] }.compact
40
53
 
41
54
  if headers.length == query.length
42
55
  @header_line = line_no
43
- return return_headers ? headers : line_no
56
+ return headers
44
57
  elsif line_no > 100
45
58
  raise Roo::HeaderRowNotFoundError
46
59
  elsif headers.length > 0
47
60
  # partial match
48
61
  @header_line = line_no
49
- raise Roo::HeaderRowIncompleteError unless @options[:force_headers]
50
- return return_headers ? headers : line_no
62
+ raise Roo::HeaderRowIncompleteError unless partial_match
63
+ return headers
51
64
  end
65
+ line_no += 1
52
66
  end
53
67
  raise Roo::HeaderRowNotFoundError
54
68
  end
55
69
 
56
70
  def search_or_set_header(options)
71
+ force_headers = options.delete(:headers)
57
72
  if options[:header_search]
58
- @headers = nil
59
- @header_line = row_with(options[:header_search])
60
- elsif [:first_row, true].include?(options[:headers])
61
- @headers = Hash[row(first_row).map.with_index{ |x, i| [x, i + first_column] }]
73
+ row_with(options[:header_search])
74
+ elsif [:first_row, true].include?(force_headers)
62
75
  @header_line = first_row
63
76
  else
64
- set_headers(options)
77
+ return set_headers(options)
65
78
  end
79
+ return Hash[row(header_line).map { |x| [x, header_index(x)] }]
80
+ rescue Roo::HeaderRowNotFoundError => e
81
+ # Not OK unless a list of headers is supplied
82
+ raise e unless force_headers.is_a?(Array)
83
+ # Force the headers in the order they are given, but up to the last column
84
+ @header_line = first_row - 1
85
+ return Hash[force_headers.zip(first_column..last_column)].cleanup
66
86
  end
67
87
 
68
- def set_headers(hash = {})
88
+ def set_headers(hash)
69
89
  # try to find header row with all values or give an error
70
90
  # then create new hash by indexing strings and keeping integers for header array
71
- @headers = row_with(hash.values, true)
72
- @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })]
73
- rescue Roo::HeaderRowNotFoundError => e
74
- if @options[:force_headers]
75
- # Finding headers failed. Force the headers in the order they are given, but up to the last column
76
- @headers = {}
77
- hash.keys.each.with_index { |k, i| @headers[k] = i + first_column if i + first_column <= last_column }
78
- @header_line = first_row
79
- @header_line -= 1 unless hash.values.any? { |v| row(1).include? v } # partial match
80
- else
81
- raise e
82
- end
91
+ row_with(hash.values)
92
+ positions = Hash[row(header_line).map { |x| [x, header_index(x)] }]
93
+ Hash[positions.map { |k, v| [hash.invert[k] || k, v] }]
83
94
  end
84
95
 
85
96
  end
@@ -19,10 +19,13 @@ module Libis
19
19
  # - required: a list of headers that need to be present. The list can be an Array containing the litteral header
20
20
  # values expected. Alternatively, a Hash is also allowed with alternative header names as keys and litteral
21
21
  # names as values. If a :headers keys is present in the Hash with a value of true or :first, whatever is on the
22
- # first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search
22
+ # first row, will be used as header values, ignoring the rest of the Hash. A key of :header_search with an array
23
+ # of strings as value will search for a row that contains each of the strings in the given array. Each string is
24
+ # searched by regular expression, so strings may contain wildcards.
23
25
  # Default is empty array, meaning to use whatever is on the first row as header.
24
26
  # - optional: a list of headers that may be present, but are not required. Similar format as above. Default is
25
- # empty array
27
+ # empty array.
28
+ # - noheader: a list of headers to force upon the sheet if no headers are present.
26
29
  # - extension: :csv, :xlsx, :xlsm, :ods, :xls, :google to help the library in deciding what format the file is in.
27
30
  #
28
31
  # The following options are only applicable to CSV input files and are ignored otherwise.
@@ -31,8 +34,6 @@ module Libis
31
34
  # - col_sep: column separator. Default is ',', but can be set to "\t" for TSV files.
32
35
  # - quote_char: character for quoting.
33
36
  #
34
- # Resources are created during initialisation and should be freed by calling the #close method.
35
- #
36
37
  # @param [String] file_name
37
38
  # @param [Hash] opts
38
39
  def initialize(file_name, opts = {})
@@ -45,18 +46,19 @@ module Libis
45
46
  col_sep: ',',
46
47
  quote_char: '"',
47
48
  ),
48
- skip_headers: true,
49
- force_headers: true,
50
49
  }.merge(opts)
51
50
 
52
51
  required_headers = options.delete(:required) || []
53
52
  optional_headers = options.delete(:optional) || []
53
+ noheader_headers = options.delete(:noheader) || []
54
54
 
55
55
  file, sheet = file_name.split('|')
56
56
  @ss = ::Roo::Spreadsheet.open(file, options)
57
57
  @ss.default_sheet = sheet if sheet
58
58
 
59
- check_headers(required_headers, optional_headers)
59
+ @header_options = {}
60
+
61
+ check_headers(required: required_headers, optional: optional_headers, noheader: noheader_headers)
60
62
 
61
63
  end
62
64
 
@@ -66,6 +68,7 @@ module Libis
66
68
  # - :sheet - overwrites default sheet name
67
69
  # - :required - Array or Hash of required headers
68
70
  # - :optional - Array or Hash of optional headers
71
+ # - :noheader - Array of noheader headers
69
72
  #
70
73
  # Each iteration, a Hash will be passed with the key names as specified in the header options and the
71
74
  # corresponding cell values.
@@ -73,20 +76,39 @@ module Libis
73
76
  # @param [Hash] options
74
77
  def each(options = {}, &block)
75
78
  @ss.default_sheet = options[:sheet] if options[:sheet]
76
- @ss.each(check_headers(options[:required], options[:optional]), &block)
79
+ @ss.each(check_headers(options), &block)
77
80
  end
78
81
 
82
+ # Parse sheet content.
83
+ #
84
+ # The options Hash can contain the following keys:
85
+ # - :sheet - overwrites default sheet name
86
+ # - :required - Array or Hash of required headers
87
+ # - :optional - Array or Hash of optional headers
88
+ # - :noheader - Array of noheader headers
89
+ #
90
+ # An Array will be returned with for each row a Hash with the key names as specified in the header options and the
91
+ # corresponding cell values.
92
+ #
93
+ # @param [Hash] options
94
+ # @return [Array<Hash>]
79
95
  def parse(options = {})
80
- @ss.default_sheet = options[:sheet] if options[:sheet]
81
- @ss.parse(check_headers(options[:required], options[:optional]))
96
+ @ss.default_sheet = options.delete(:sheet) if options.has_key?(:sheet)
97
+ @ss.parse(check_headers(options))
82
98
  end
83
99
 
100
+ # Return the current row and increment the current_row pointer.
84
101
  def shift
85
102
  return nil unless @current_row < @ss.last_row
86
103
  @current_row += 1
87
104
  Hash[@ss.row(@current_row).map.with_index { |v, i| [headers[i], v] }]
88
105
  end
89
106
 
107
+ # Set the current_row pointer back to the start
108
+ def restart
109
+ @current_row = @ss.header_line
110
+ end
111
+
90
112
  # Open and iterate over sheet content.
91
113
  #
92
114
  # @param @see #initialize
@@ -95,60 +117,77 @@ module Libis
95
117
  end
96
118
 
97
119
  def headers
98
- (@ss.headers || {}).keys + @extra_headers
120
+ (@ss.headers || {}).keys
99
121
  end
100
122
 
101
123
  private
102
124
 
103
- def check_headers(required_headers, optional_headers)
104
- return @header_options unless required_headers || optional_headers
105
- header_options = {}
106
- required_headers ||= []
107
- optional_headers ||= []
108
- unless required_headers.is_a?(Hash) || required_headers.is_a?(Array)
109
- raise RuntimeError, 'Required headers should be either a Hash or an Array.'
110
- end
111
- unless optional_headers.is_a?(Hash) || optional_headers.is_a?(Array)
112
- raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
113
- end
114
- if required_headers.empty?
115
- if optional_headers.empty?
116
- header_options[:headers] = :first_row
117
- else
118
- header_options[:header_search] =
119
- (optional_headers.is_a?(Hash) ? optional_headers.values : optional_headers)
125
+ def check_headers(options = {})
126
+ if options[:required] || options[:optional] || options[:noheader]
127
+
128
+ # defaults
129
+ ss_options = {}
130
+ required_headers = options[:required] || []
131
+ optional_headers = options[:optional] || []
132
+
133
+ # make sure required_headers is a Hash
134
+ case required_headers
135
+ when Hash
136
+ # OK
137
+ when Array
138
+ required_headers = Hash[required_headers.zip(required_headers)]
139
+ else
140
+ raise RuntimeError, 'Required headers should be either a Hash or an Array.'
120
141
  end
121
- else
122
- header_options =
123
- required_headers.is_a?(Hash) ? required_headers : Hash[required_headers.map { |x| [x] * 2 }]
124
- header_options.merge!(
125
- optional_headers.is_a?(Hash) ? optional_headers : Hash[optional_headers.map { |x| [x] * 2 }]
126
- )
127
- end
128
142
 
129
- required_headers = required_headers.values if required_headers.is_a?(Hash)
130
-
131
- @ss.each(header_options) { break }
132
- @current_row = @ss.header_line
143
+ # make sure optional_headers is a Hash
144
+ case optional_headers
145
+ when Hash
146
+ # OK
147
+ when Array
148
+ optional_headers = Hash[optional_headers.zip(optional_headers)]
149
+ else
150
+ raise RuntimeError, 'Optional headers should be either a Hash or an Array.'
151
+ end
133
152
 
134
- # checks
135
- found_headers = required_headers & @ss.row([@current_row, 1].max)
136
- if found_headers.empty?
137
- # No headers found - check if there are enough columns to satisfy the required headers
138
- if required_headers.size > (@ss.last_column - @ss.first_column) + 1
139
- raise RuntimeError, 'Sheet does not contain enough columns.'
153
+ # make sure noheader_headers is properly intialized
154
+ noheader_headers = options[:noheader]
155
+ raise RuntimeError, 'Noheader headers should be an Array.' unless noheader_headers.is_a?(Array)
156
+
157
+ # if not set, default to both required and optional headers
158
+ noheader_headers = (required_headers.keys + optional_headers.keys) if noheader_headers.empty?
159
+
160
+ # force noheader_headers or just use first row
161
+ ss_options[:headers] = noheader_headers.empty? ? :first_row : noheader_headers
162
+
163
+ # search for whatever whas supplied
164
+ ss_options.merge!(required_headers).merge!(optional_headers)
165
+
166
+ # allow partial match for only required headers
167
+ ss_options[:partial_match] = true
168
+ ss_options[:required_headers] = required_headers.keys
169
+
170
+ # force a header check (may throw exceptions)
171
+ begin
172
+ @ss.each(ss_options.dup) { break }
173
+ rescue Roo::HeaderRowNotFoundError
174
+ found_headers = required_headers.keys & @ss.headers.keys
175
+ raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
176
+ rescue Roo::HeaderRowIncompleteError
177
+ if @ss.row(@ss.header_line).compact.empty?
178
+ raise RuntimeError, 'Sheet does not contain enough columns.'
179
+ else
180
+ found_headers = required_headers.keys & @ss.headers.keys
181
+ raise RuntimeError, "Headers not found: #{required_headers.keys - found_headers}."
182
+
183
+ end
140
184
  end
141
- elsif found_headers.size < required_headers.size
142
- # Some, but not all headers found
143
- raise RuntimeError, "Headers not found: #{required_headers - found_headers}."
144
- else
145
- # All required headers found
146
- end
147
185
 
148
- @extra_headers = (required_headers.empty? && optional_headers.empty?) ? [] :
149
- @ss.row(@ss.header_line).keep_if { |x| x && !header_options.values.include?(x) }
186
+ @current_row = @ss.header_line
187
+ @header_options = ss_options.merge(skip_headers: true)
188
+ end
150
189
 
151
- @header_options = header_options.merge(Hash[@extra_headers.map { |v| [v] * 2 }])
190
+ @header_options.dup
152
191
  end
153
192
 
154
193
  end
@@ -1,5 +1,5 @@
1
1
  module Libis
2
2
  module Tools
3
- VERSION = '0.9.44'
3
+ VERSION = '0.9.45'
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: libis-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.44
4
+ version: 0.9.45
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kris Dekeyser
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-22 00:00:00.000000000 Z
11
+ date: 2016-11-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler