simple_xlsx_reader 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +100 -0
- data/Rakefile +8 -0
- data/lib/simple_xlsx_reader.rb +354 -0
- data/lib/simple_xlsx_reader/version.rb +3 -0
- data/simple_xlsx_reader.gemspec +24 -0
- data/test/sesame_street_blog.xlsx +0 -0
- data/test/shared_strings.xml +80 -0
- data/test/simple_xlsx_reader_test.rb +108 -0
- data/test/test_helper.rb +7 -0
- metadata +110 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Woody Peterson
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# SimpleXlsxReader
|
2
|
+
|
3
|
+
An xlsx reader for Ruby that parses xlsx cell values into plain ruby
|
4
|
+
primitives and dates/times.
|
5
|
+
|
6
|
+
This is *not* a rewrite of excel in Ruby. Font styles, for
|
7
|
+
example, are parsed to determine whether a cell is a number or a date,
|
8
|
+
then forgotten. We just want to get the data, and get out!
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
### Summary:
|
13
|
+
|
14
|
+
doc = SimpleXlsxReader.open('/path/to/workbook.xlsx')
|
15
|
+
doc.sheets # => [<#SXR::Sheet>, ...]
|
16
|
+
doc.sheets.first.name # 'Sheet1'
|
17
|
+
doc.sheets.first.rows # [['Header 1', 'Header 2', ...]
|
18
|
+
['foo', 2, ...]]
|
19
|
+
|
20
|
+
That's it!
|
21
|
+
|
22
|
+
### Load Errors
|
23
|
+
|
24
|
+
By default, cell load errors (ex. if a date cell contains the string
|
25
|
+
'hello') result in a SimpleXlsxReader::CellLoadError.
|
26
|
+
|
27
|
+
If you would like to provide better error feedback to your users, you
|
28
|
+
can set `SimpleXlsxReader.configuration.catch_cell_load_errors =
|
29
|
+
true`, and load errors will instead be inserted into Sheet#load_errors keyed
|
30
|
+
by [rownum, colnum].
|
31
|
+
|
32
|
+
### More
|
33
|
+
|
34
|
+
Here's the totality of the public api, in code:
|
35
|
+
|
36
|
+
module SimpleXlsxReader
|
37
|
+
def self.open(file_path)
|
38
|
+
Document.new(file_path).tap(&:sheets)
|
39
|
+
end
|
40
|
+
|
41
|
+
class Document
|
42
|
+
attr_reader :file_path
|
43
|
+
|
44
|
+
def initialize(file_path)
|
45
|
+
@file_path = file_path
|
46
|
+
end
|
47
|
+
|
48
|
+
def sheets
|
49
|
+
@sheets ||= Mapper.new(xml).load_sheets
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_hash
|
53
|
+
sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
|
54
|
+
end
|
55
|
+
|
56
|
+
def xml
|
57
|
+
Xml.load(file_path)
|
58
|
+
end
|
59
|
+
|
60
|
+
class Sheet < Struct.new(:name, :rows)
|
61
|
+
def headers
|
62
|
+
rows[0]
|
63
|
+
end
|
64
|
+
|
65
|
+
def data
|
66
|
+
rows[1..-1]
|
67
|
+
end
|
68
|
+
|
69
|
+
# Load errors will be a hash of the form:
|
70
|
+
# {
|
71
|
+
# [rownum, colnum] => '[error]'
|
72
|
+
# }
|
73
|
+
def load_errors
|
74
|
+
@load_errors ||= {}
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
## Installation
|
81
|
+
|
82
|
+
Add this line to your application's Gemfile:
|
83
|
+
|
84
|
+
gem 'simple_xlsx_reader'
|
85
|
+
|
86
|
+
And then execute:
|
87
|
+
|
88
|
+
$ bundle
|
89
|
+
|
90
|
+
Or install it yourself as:
|
91
|
+
|
92
|
+
$ gem install simple_xlsx_reader
|
93
|
+
|
94
|
+
## Contributing
|
95
|
+
|
96
|
+
1. Fork it
|
97
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
98
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
99
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
100
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,354 @@
|
|
1
|
+
require "simple_xlsx_reader/version"
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'zip/zip'
|
4
|
+
require 'zip/zipfilesystem'
|
5
|
+
require 'date'
|
6
|
+
|
7
|
+
module SimpleXlsxReader
|
8
|
+
class CellLoadError < StandardError; end
|
9
|
+
|
10
|
+
def self.configuration
|
11
|
+
@configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
|
12
|
+
c.catch_cell_load_errors = false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.open(file_path)
|
17
|
+
Document.new(file_path).tap(&:sheets)
|
18
|
+
end
|
19
|
+
|
20
|
+
class Document
|
21
|
+
attr_reader :file_path
|
22
|
+
|
23
|
+
def initialize(file_path)
|
24
|
+
@file_path = file_path
|
25
|
+
end
|
26
|
+
|
27
|
+
def sheets
|
28
|
+
@sheets ||= Mapper.new(xml).load_sheets
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_hash
|
32
|
+
sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
|
33
|
+
end
|
34
|
+
|
35
|
+
def xml
|
36
|
+
Xml.load(file_path)
|
37
|
+
end
|
38
|
+
|
39
|
+
class Sheet < Struct.new(:name, :rows)
|
40
|
+
def headers
|
41
|
+
rows[0]
|
42
|
+
end
|
43
|
+
|
44
|
+
def data
|
45
|
+
rows[1..-1]
|
46
|
+
end
|
47
|
+
|
48
|
+
# Load errors will be a hash of the form:
|
49
|
+
# {
|
50
|
+
# [rownum, colnum] => '[error]'
|
51
|
+
# }
|
52
|
+
def load_errors
|
53
|
+
@load_errors ||= {}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# For internal use; stores source xml in nokogiri documents
|
59
|
+
class Xml
|
60
|
+
attr_accessor :workbook, :shared_strings, :sheets, :styles
|
61
|
+
|
62
|
+
def self.load(file_path)
|
63
|
+
self.new.tap do |xml|
|
64
|
+
Zip::ZipFile.open(file_path) do |zip|
|
65
|
+
xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml'))
|
66
|
+
xml.styles = Nokogiri::XML(zip.read('xl/styles.xml'))
|
67
|
+
|
68
|
+
# optional feature used by excel, but not often used by xlsx
|
69
|
+
# generation libraries
|
70
|
+
if zip.file.file?('xl/sharedStrings.xml')
|
71
|
+
xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
|
72
|
+
end
|
73
|
+
|
74
|
+
xml.sheets = []
|
75
|
+
i = 0
|
76
|
+
loop do
|
77
|
+
i += 1
|
78
|
+
break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
|
79
|
+
|
80
|
+
xml.sheets <<
|
81
|
+
Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# For internal use; translates source xml to Sheet objects.
|
90
|
+
class Mapper < Struct.new(:xml)
|
91
|
+
def load_sheets
|
92
|
+
sheet_toc.map do |(sheet_name, sheet_number)|
|
93
|
+
parse_sheet(sheet_name, xml.sheets[sheet_number])
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Table of contents for the sheets, ex. {'Authors' => 0, ...}
|
98
|
+
def sheet_toc
|
99
|
+
xml.workbook.xpath('/xmlns:workbook/xmlns:sheets/xmlns:sheet').
|
100
|
+
inject({}) do |acc, sheet|
|
101
|
+
|
102
|
+
acc[sheet.attributes['name'].value] =
|
103
|
+
sheet.attributes['sheetId'].value.to_i - 1 # keep things 0-indexed
|
104
|
+
|
105
|
+
acc
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_sheet(sheet_name, xsheet)
|
110
|
+
sheet = Sheet.new(sheet_name)
|
111
|
+
|
112
|
+
rownum = -1
|
113
|
+
sheet.rows =
|
114
|
+
xsheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").map do |xrow|
|
115
|
+
rownum += 1
|
116
|
+
|
117
|
+
colnum = -1
|
118
|
+
xrow.xpath('xmlns:c').map do |xcell|
|
119
|
+
colnum += 1
|
120
|
+
|
121
|
+
type = xcell.attributes['t'] &&
|
122
|
+
xcell.attributes['t'].value
|
123
|
+
# If not the above, attempt to determine from a custom style
|
124
|
+
type ||= xcell.attributes['s'] &&
|
125
|
+
style_types[xcell.attributes['s'].value.to_i]
|
126
|
+
|
127
|
+
begin
|
128
|
+
self.class.cast(xcell.text, type, shared_strings: shared_strings)
|
129
|
+
rescue => e
|
130
|
+
if !SimpleXlsxReader.configuration.catch_cell_load_errors
|
131
|
+
error = CellLoadError.new(
|
132
|
+
"Row #{rownum}, Col #{colnum}: #{e.message}")
|
133
|
+
error.set_backtrace(e.backtrace)
|
134
|
+
raise error
|
135
|
+
else
|
136
|
+
sheet.load_errors[[rownum, colnum]] = e.message
|
137
|
+
|
138
|
+
xcell.text
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
sheet
|
145
|
+
end
|
146
|
+
|
147
|
+
# Excel doesn't record types for some cells, only its display style, so
|
148
|
+
# we have to back out the type from that style.
|
149
|
+
#
|
150
|
+
# Some of these styles can be determined from a known set (see NumFmtMap),
|
151
|
+
# while others are 'custom' and we have to make a best guess.
|
152
|
+
#
|
153
|
+
# This is the array of types corresponding to the styles a spreadsheet
|
154
|
+
# uses, and includes both the known style types and the custom styles.
|
155
|
+
#
|
156
|
+
# Note that the xml sheet cells that use this don't reference the
|
157
|
+
# numFmtId, but instead the array index of a style in the stored list of
|
158
|
+
# only the styles used in the spreadsheet (which can be either known or
|
159
|
+
# custom). Hence this style types array, rather than a map of numFmtId to
|
160
|
+
# type.
|
161
|
+
def style_types
|
162
|
+
@style_types ||=
|
163
|
+
xml.styles.xpath('/xmlns:styleSheet/xmlns:cellXfs/xmlns:xf').map {|xstyle|
|
164
|
+
style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
|
165
|
+
end
|
166
|
+
|
167
|
+
# Finds the type we think a style is; For example, fmtId 14 is a date
|
168
|
+
# style, so this would return :date
|
169
|
+
def style_type_by_num_fmt_id(id)
|
170
|
+
return nil if id.nil?
|
171
|
+
|
172
|
+
id = id.to_i
|
173
|
+
if id > 164 # custom style, arg!
|
174
|
+
custom_style_types[id]
|
175
|
+
else # we should know this one
|
176
|
+
NumFmtMap[id]
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Map of (numFmtId > 164) (custom styles) to our best guess at the type
|
181
|
+
# ex. {165 => :date_time}
|
182
|
+
def custom_style_types
|
183
|
+
@custom_style_types ||=
|
184
|
+
xml.styles.xpath('/xmlns:styleSheet/xmlns:numFmts/xmlns:numFmt').
|
185
|
+
inject({}) do |acc, xstyle|
|
186
|
+
|
187
|
+
acc[xstyle.attributes['numFmtId'].value.to_i] =
|
188
|
+
determine_custom_style_type(xstyle.attributes['formatCode'].value)
|
189
|
+
|
190
|
+
acc
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# This is the least deterministic part of reading xlsx files. Due to
|
195
|
+
# custom styles, you can't know for sure when a date is a date other than
|
196
|
+
# looking at its format and gessing. It's not impossible to guess right,
|
197
|
+
# though.
|
198
|
+
#
|
199
|
+
# http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
|
200
|
+
def determine_custom_style_type(string)
|
201
|
+
return :float if string[0] == '_'
|
202
|
+
return :float if string[0] == ' 0'
|
203
|
+
|
204
|
+
# Looks for one of ymdhis outside of meta-stuff like [Red]
|
205
|
+
return :date_time if string =~ /(^|\])[^\[]*[ymdhis]/i
|
206
|
+
|
207
|
+
return :unsupported
|
208
|
+
end
|
209
|
+
|
210
|
+
##
|
211
|
+
# The heart of typecasting. The ruby type is determined either explicitly
|
212
|
+
# from the cell xml or implicitly from the cell style, and this
|
213
|
+
# method expects that work to have been done already. This, then,
|
214
|
+
# takes the type we determined it to be and casts the cell value
|
215
|
+
# to that type.
|
216
|
+
#
|
217
|
+
# types:
|
218
|
+
# - s: shared string (see #shared_string)
|
219
|
+
# - n: number (cast to a float)
|
220
|
+
# - b: boolean
|
221
|
+
# - str: string
|
222
|
+
# - inlineStr: string
|
223
|
+
# - ruby symbol: for when type has been determined by style
|
224
|
+
#
|
225
|
+
# options:
|
226
|
+
# - shared_strings: needed for 's' (shared string) type
|
227
|
+
def self.cast(value, type, options = {})
|
228
|
+
return nil if value.nil? || value.empty?
|
229
|
+
|
230
|
+
case type
|
231
|
+
|
232
|
+
##
|
233
|
+
# There are few built-in types
|
234
|
+
##
|
235
|
+
|
236
|
+
when 's' # shared string
|
237
|
+
options[:shared_strings][value.to_i]
|
238
|
+
when 'n' # number
|
239
|
+
value.to_f
|
240
|
+
when 'b'
|
241
|
+
value.to_i == 1
|
242
|
+
when 'str'
|
243
|
+
value
|
244
|
+
when 'inlineStr'
|
245
|
+
value
|
246
|
+
|
247
|
+
##
|
248
|
+
# Type can also be determined by a style,
|
249
|
+
# detected earlier and cast here by its standardized symbol
|
250
|
+
##
|
251
|
+
|
252
|
+
when :string, :unsupported
|
253
|
+
value
|
254
|
+
when :fixnum
|
255
|
+
value.to_i
|
256
|
+
when :float
|
257
|
+
value.to_f
|
258
|
+
when :percentage
|
259
|
+
value.to_f / 100
|
260
|
+
# the trickiest. note that all these formats can vary on
|
261
|
+
# whether they actually contain a date, time, or datetime.
|
262
|
+
when :date, :time, :date_time
|
263
|
+
days_since_1900, fraction_of_24 = value.split('.')
|
264
|
+
|
265
|
+
# http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
|
266
|
+
date = Date.new(1899, 12, 30) + Integer(days_since_1900)
|
267
|
+
|
268
|
+
if fraction_of_24 # there is a time associated
|
269
|
+
fraction_of_24 = "0.#{fraction_of_24}".to_f
|
270
|
+
military = fraction_of_24 * 24
|
271
|
+
hour = military.truncate
|
272
|
+
minute = ((military % 1) * 60).truncate
|
273
|
+
|
274
|
+
return Time.utc(date.year, date.month, date.day, hour, minute)
|
275
|
+
else
|
276
|
+
return date
|
277
|
+
end
|
278
|
+
when :bignum
|
279
|
+
if defined?(BigDecimal)
|
280
|
+
BigDecimal.new(value)
|
281
|
+
else
|
282
|
+
value.to_f
|
283
|
+
end
|
284
|
+
|
285
|
+
##
|
286
|
+
# Beats me
|
287
|
+
##
|
288
|
+
|
289
|
+
else
|
290
|
+
value
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Map of non-custom numFmtId to casting symbol
|
295
|
+
NumFmtMap = {
|
296
|
+
0 => :string, # General
|
297
|
+
1 => :fixnum, # 0
|
298
|
+
2 => :float, # 0.00
|
299
|
+
3 => :fixnum, # #,##0
|
300
|
+
4 => :float, # #,##0.00
|
301
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
302
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
303
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
304
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
305
|
+
9 => :percentage, # 0%
|
306
|
+
10 => :percentage, # 0.00%
|
307
|
+
11 => :bignum, # 0.00E+00
|
308
|
+
12 => :unsupported, # # ?/?
|
309
|
+
13 => :unsupported, # # ??/??
|
310
|
+
14 => :date, # mm-dd-yy
|
311
|
+
15 => :date, # d-mmm-yy
|
312
|
+
16 => :date, # d-mmm
|
313
|
+
17 => :date, # mmm-yy
|
314
|
+
18 => :time, # h:mm AM/PM
|
315
|
+
19 => :time, # h:mm:ss AM/PM
|
316
|
+
20 => :time, # h:mm
|
317
|
+
21 => :time, # h:mm:ss
|
318
|
+
22 => :date_time, # m/d/yy h:mm
|
319
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
320
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
321
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
322
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
323
|
+
45 => :time, # mm:ss
|
324
|
+
46 => :time, # [h]:mm:ss
|
325
|
+
47 => :time, # mmss.0
|
326
|
+
48 => :bignum, # ##0.0E+0
|
327
|
+
49 => :unsupported # @
|
328
|
+
}
|
329
|
+
|
330
|
+
# For performance reasons, excel uses an optional SpreadsheetML feature
|
331
|
+
# that puts all strings in a separate xml file, and then references
|
332
|
+
# them by their index in that file.
|
333
|
+
#
|
334
|
+
# http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
|
335
|
+
def shared_strings
|
336
|
+
@shared_strings ||= begin
|
337
|
+
if xml.shared_strings
|
338
|
+
xml.shared_strings.xpath('/xmlns:sst/xmlns:si').map do |xsst|
|
339
|
+
# a shared string can be a single value...
|
340
|
+
sst = xsst.xpath('xmlns:t/text()').first
|
341
|
+
sst = sst.text if sst
|
342
|
+
# ... or a composite of seperately styled words/characters
|
343
|
+
sst ||= xsst.xpath('xmlns:r/xmlns:t/text()').map(&:text).join
|
344
|
+
end
|
345
|
+
else
|
346
|
+
[]
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|
354
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'simple_xlsx_reader/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "simple_xlsx_reader"
|
8
|
+
gem.version = SimpleXlsxReader::VERSION
|
9
|
+
gem.authors = ["Woody Peterson"]
|
10
|
+
gem.email = ["woody@sigby.com"]
|
11
|
+
gem.description = %q{Read xlsx data the Ruby way}
|
12
|
+
gem.summary = %q{Read xlsx data the Ruby way}
|
13
|
+
gem.homepage = ""
|
14
|
+
|
15
|
+
gem.add_dependency 'nokogiri'
|
16
|
+
gem.add_dependency 'rubyzip'
|
17
|
+
|
18
|
+
gem.add_development_dependency 'pry'
|
19
|
+
|
20
|
+
gem.files = `git ls-files`.split($/)
|
21
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
22
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
23
|
+
gem.require_paths = ["lib"]
|
24
|
+
end
|
Binary file
|
@@ -0,0 +1,80 @@
|
|
1
|
+
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="6" uniqueCount="5">
|
2
|
+
<si>
|
3
|
+
<t>Cell A1</t>
|
4
|
+
</si>
|
5
|
+
<si>
|
6
|
+
<t>Cell B1</t>
|
7
|
+
</si>
|
8
|
+
<si>
|
9
|
+
<t>My Cell</t>
|
10
|
+
</si>
|
11
|
+
<si>
|
12
|
+
<r>
|
13
|
+
<rPr>
|
14
|
+
<sz val="11"/>
|
15
|
+
<color rgb="FFFF0000"/>
|
16
|
+
<rFont val="Calibri"/>
|
17
|
+
<family val="2"/>
|
18
|
+
<scheme val="minor"/>
|
19
|
+
</rPr>
|
20
|
+
<t>Cell</t>
|
21
|
+
</r>
|
22
|
+
<r>
|
23
|
+
<rPr>
|
24
|
+
<sz val="11"/>
|
25
|
+
<color theme="1"/>
|
26
|
+
<rFont val="Calibri"/>
|
27
|
+
<family val="2"/>
|
28
|
+
<scheme val="minor"/>
|
29
|
+
</rPr>
|
30
|
+
<t xml:space="preserve"> </t>
|
31
|
+
</r>
|
32
|
+
<r>
|
33
|
+
<rPr>
|
34
|
+
<b/>
|
35
|
+
<sz val="11"/>
|
36
|
+
<color theme="1"/>
|
37
|
+
<rFont val="Calibri"/>
|
38
|
+
<family val="2"/>
|
39
|
+
<scheme val="minor"/>
|
40
|
+
</rPr>
|
41
|
+
<t>A2</t>
|
42
|
+
</r>
|
43
|
+
</si>
|
44
|
+
<si>
|
45
|
+
<r>
|
46
|
+
<rPr>
|
47
|
+
<sz val="11"/>
|
48
|
+
<color rgb="FF00B0F0"/>
|
49
|
+
<rFont val="Calibri"/>
|
50
|
+
<family val="2"/>
|
51
|
+
<scheme val="minor"/>
|
52
|
+
</rPr>
|
53
|
+
<t>Cell</t>
|
54
|
+
</r>
|
55
|
+
<r>
|
56
|
+
<rPr>
|
57
|
+
<sz val="11"/>
|
58
|
+
<color theme="1"/>
|
59
|
+
<rFont val="Calibri"/>
|
60
|
+
<family val="2"/>
|
61
|
+
<scheme val="minor"/>
|
62
|
+
</rPr>
|
63
|
+
<t xml:space="preserve"> </t>
|
64
|
+
</r>
|
65
|
+
<r>
|
66
|
+
<rPr>
|
67
|
+
<i/>
|
68
|
+
<sz val="11"/>
|
69
|
+
<color theme="1"/>
|
70
|
+
<rFont val="Calibri"/>
|
71
|
+
<family val="2"/>
|
72
|
+
<scheme val="minor"/>
|
73
|
+
</rPr>
|
74
|
+
<t>B2</t>
|
75
|
+
</r>
|
76
|
+
</si>
|
77
|
+
<si>
|
78
|
+
<t>Cell Fmt</t>
|
79
|
+
</si>
|
80
|
+
</sst>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
describe SimpleXlsxReader do
|
5
|
+
let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
|
6
|
+
'sesame_street_blog.xlsx') }
|
7
|
+
|
8
|
+
let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
|
9
|
+
|
10
|
+
describe '#to_hash' do
|
11
|
+
it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
|
12
|
+
subject.to_hash.must_equal({
|
13
|
+
"Authors"=>
|
14
|
+
[["Name", "Occupation"],
|
15
|
+
["Big Bird", "Teacher"]],
|
16
|
+
|
17
|
+
"Posts"=>
|
18
|
+
[["Author Name", "Title", "Body", "Created At", "Comment Count"],
|
19
|
+
["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
|
20
|
+
["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2]]
|
21
|
+
})
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe SimpleXlsxReader::Document::Mapper do
|
26
|
+
let(:described_class) { SimpleXlsxReader::Document::Mapper }
|
27
|
+
|
28
|
+
describe '::cast' do
|
29
|
+
it 'reads type s as a shared string' do
|
30
|
+
described_class.cast('1', 's', shared_strings: ['a', 'b', 'c']).
|
31
|
+
must_equal 'b'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'reads type inlineStr as a string' do
|
35
|
+
xml = Nokogiri::XML(%( <c t="inlineStr"><is><t>the value</t></is></c> ))
|
36
|
+
described_class.cast(xml.text, 'inlineStr').must_equal 'the value'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#shared_strings' do
|
41
|
+
let(:xml) do
|
42
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
43
|
+
xml.shared_strings = Nokogiri::XML(File.read(
|
44
|
+
File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
subject { described_class.new(xml) }
|
49
|
+
|
50
|
+
it 'parses strings formatted at the cell level' do
|
51
|
+
subject.shared_strings[0..2].must_equal ['Cell A1', 'Cell B1', 'My Cell']
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'parses strings formatted at the character level' do
|
55
|
+
subject.shared_strings[3..5].must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "parse errors" do
|
60
|
+
after do
|
61
|
+
SimpleXlsxReader.configuration.catch_cell_load_errors = false
|
62
|
+
end
|
63
|
+
|
64
|
+
let(:xml) do
|
65
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
66
|
+
xml.sheets = [Nokogiri::XML(
|
67
|
+
<<-XML
|
68
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
69
|
+
<sheetData>
|
70
|
+
<row>
|
71
|
+
<c s='0'>
|
72
|
+
<v>14 is a date style; this is not a date</v>
|
73
|
+
</c>
|
74
|
+
</row>
|
75
|
+
</sheetData>
|
76
|
+
</worksheet>
|
77
|
+
XML
|
78
|
+
)]
|
79
|
+
|
80
|
+
# s='0' above refers to the value of numFmtId at cellXfs index 0
|
81
|
+
xml.styles = Nokogiri::XML(
|
82
|
+
<<-XML
|
83
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
84
|
+
<cellXfs count="1">
|
85
|
+
<xf numFmtId="14" />
|
86
|
+
</cellXfs>
|
87
|
+
</styleSheet>
|
88
|
+
XML
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'raises if configuration.raise_on_parse_error' do
|
94
|
+
SimpleXlsxReader.configuration.catch_cell_load_errors = false
|
95
|
+
|
96
|
+
lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
|
97
|
+
must_raise(SimpleXlsxReader::CellLoadError)
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'records a load error if not configuration.raise_on_parse_error' do
|
101
|
+
SimpleXlsxReader.configuration.catch_cell_load_errors = true
|
102
|
+
|
103
|
+
sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
|
104
|
+
sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple_xlsx_reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Woody Peterson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-16 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rubyzip
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: pry
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Read xlsx data the Ruby way
|
63
|
+
email:
|
64
|
+
- woody@sigby.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- Gemfile
|
71
|
+
- LICENSE.txt
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- lib/simple_xlsx_reader.rb
|
75
|
+
- lib/simple_xlsx_reader/version.rb
|
76
|
+
- simple_xlsx_reader.gemspec
|
77
|
+
- test/sesame_street_blog.xlsx
|
78
|
+
- test/shared_strings.xml
|
79
|
+
- test/simple_xlsx_reader_test.rb
|
80
|
+
- test/test_helper.rb
|
81
|
+
homepage: ''
|
82
|
+
licenses: []
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options: []
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.8.24
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: Read xlsx data the Ruby way
|
105
|
+
test_files:
|
106
|
+
- test/sesame_street_blog.xlsx
|
107
|
+
- test/shared_strings.xml
|
108
|
+
- test/simple_xlsx_reader_test.rb
|
109
|
+
- test/test_helper.rb
|
110
|
+
has_rdoc:
|