simple_xlsx_reader 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README.md +100 -0
- data/Rakefile +8 -0
- data/lib/simple_xlsx_reader.rb +354 -0
- data/lib/simple_xlsx_reader/version.rb +3 -0
- data/simple_xlsx_reader.gemspec +24 -0
- data/test/sesame_street_blog.xlsx +0 -0
- data/test/shared_strings.xml +80 -0
- data/test/simple_xlsx_reader_test.rb +108 -0
- data/test/test_helper.rb +7 -0
- metadata +110 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 Woody Peterson
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
# SimpleXlsxReader
|
2
|
+
|
3
|
+
An xlsx reader for Ruby that parses xlsx cell values into plain ruby
|
4
|
+
primitives and dates/times.
|
5
|
+
|
6
|
+
This is *not* a rewrite of excel in Ruby. Font styles, for
|
7
|
+
example, are parsed to determine whether a cell is a number or a date,
|
8
|
+
then forgotten. We just want to get the data, and get out!
|
9
|
+
|
10
|
+
## Usage
|
11
|
+
|
12
|
+
### Summary:
|
13
|
+
|
14
|
+
doc = SimpleXlsxReader.open('/path/to/workbook.xlsx')
|
15
|
+
doc.sheets # => [<#SXR::Sheet>, ...]
|
16
|
+
doc.sheets.first.name # 'Sheet1'
|
17
|
+
doc.sheets.first.rows # [['Header 1', 'Header 2', ...]
|
18
|
+
['foo', 2, ...]]
|
19
|
+
|
20
|
+
That's it!
|
21
|
+
|
22
|
+
### Load Errors
|
23
|
+
|
24
|
+
By default, cell load errors (ex. if a date cell contains the string
|
25
|
+
'hello') result in a SimpleXlsxReader::CellLoadError.
|
26
|
+
|
27
|
+
If you would like to provide better error feedback to your users, you
|
28
|
+
can set `SimpleXlsxReader.configuration.catch_cell_load_errors =
|
29
|
+
true`, and load errors will instead be inserted into Sheet#load_errors keyed
|
30
|
+
by [rownum, colnum].
|
31
|
+
|
32
|
+
### More
|
33
|
+
|
34
|
+
Here's the totality of the public api, in code:
|
35
|
+
|
36
|
+
module SimpleXlsxReader
|
37
|
+
def self.open(file_path)
|
38
|
+
Document.new(file_path).tap(&:sheets)
|
39
|
+
end
|
40
|
+
|
41
|
+
class Document
|
42
|
+
attr_reader :file_path
|
43
|
+
|
44
|
+
def initialize(file_path)
|
45
|
+
@file_path = file_path
|
46
|
+
end
|
47
|
+
|
48
|
+
def sheets
|
49
|
+
@sheets ||= Mapper.new(xml).load_sheets
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_hash
|
53
|
+
sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
|
54
|
+
end
|
55
|
+
|
56
|
+
def xml
|
57
|
+
Xml.load(file_path)
|
58
|
+
end
|
59
|
+
|
60
|
+
class Sheet < Struct.new(:name, :rows)
|
61
|
+
def headers
|
62
|
+
rows[0]
|
63
|
+
end
|
64
|
+
|
65
|
+
def data
|
66
|
+
rows[1..-1]
|
67
|
+
end
|
68
|
+
|
69
|
+
# Load errors will be a hash of the form:
|
70
|
+
# {
|
71
|
+
# [rownum, colnum] => '[error]'
|
72
|
+
# }
|
73
|
+
def load_errors
|
74
|
+
@load_errors ||= {}
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
## Installation
|
81
|
+
|
82
|
+
Add this line to your application's Gemfile:
|
83
|
+
|
84
|
+
gem 'simple_xlsx_reader'
|
85
|
+
|
86
|
+
And then execute:
|
87
|
+
|
88
|
+
$ bundle
|
89
|
+
|
90
|
+
Or install it yourself as:
|
91
|
+
|
92
|
+
$ gem install simple_xlsx_reader
|
93
|
+
|
94
|
+
## Contributing
|
95
|
+
|
96
|
+
1. Fork it
|
97
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
98
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
99
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
100
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,354 @@
|
|
1
|
+
require "simple_xlsx_reader/version"
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'zip/zip'
|
4
|
+
require 'zip/zipfilesystem'
|
5
|
+
require 'date'
|
6
|
+
|
7
|
+
module SimpleXlsxReader
|
8
|
+
class CellLoadError < StandardError; end
|
9
|
+
|
10
|
+
def self.configuration
|
11
|
+
@configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
|
12
|
+
c.catch_cell_load_errors = false
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.open(file_path)
|
17
|
+
Document.new(file_path).tap(&:sheets)
|
18
|
+
end
|
19
|
+
|
20
|
+
class Document
|
21
|
+
attr_reader :file_path
|
22
|
+
|
23
|
+
def initialize(file_path)
|
24
|
+
@file_path = file_path
|
25
|
+
end
|
26
|
+
|
27
|
+
def sheets
|
28
|
+
@sheets ||= Mapper.new(xml).load_sheets
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_hash
|
32
|
+
sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
|
33
|
+
end
|
34
|
+
|
35
|
+
def xml
|
36
|
+
Xml.load(file_path)
|
37
|
+
end
|
38
|
+
|
39
|
+
class Sheet < Struct.new(:name, :rows)
|
40
|
+
def headers
|
41
|
+
rows[0]
|
42
|
+
end
|
43
|
+
|
44
|
+
def data
|
45
|
+
rows[1..-1]
|
46
|
+
end
|
47
|
+
|
48
|
+
# Load errors will be a hash of the form:
|
49
|
+
# {
|
50
|
+
# [rownum, colnum] => '[error]'
|
51
|
+
# }
|
52
|
+
def load_errors
|
53
|
+
@load_errors ||= {}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
##
|
58
|
+
# For internal use; stores source xml in nokogiri documents
|
59
|
+
class Xml
|
60
|
+
attr_accessor :workbook, :shared_strings, :sheets, :styles
|
61
|
+
|
62
|
+
def self.load(file_path)
|
63
|
+
self.new.tap do |xml|
|
64
|
+
Zip::ZipFile.open(file_path) do |zip|
|
65
|
+
xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml'))
|
66
|
+
xml.styles = Nokogiri::XML(zip.read('xl/styles.xml'))
|
67
|
+
|
68
|
+
# optional feature used by excel, but not often used by xlsx
|
69
|
+
# generation libraries
|
70
|
+
if zip.file.file?('xl/sharedStrings.xml')
|
71
|
+
xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
|
72
|
+
end
|
73
|
+
|
74
|
+
xml.sheets = []
|
75
|
+
i = 0
|
76
|
+
loop do
|
77
|
+
i += 1
|
78
|
+
break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
|
79
|
+
|
80
|
+
xml.sheets <<
|
81
|
+
Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
##
|
89
|
+
# For internal use; translates source xml to Sheet objects.
|
90
|
+
class Mapper < Struct.new(:xml)
|
91
|
+
def load_sheets
|
92
|
+
sheet_toc.map do |(sheet_name, sheet_number)|
|
93
|
+
parse_sheet(sheet_name, xml.sheets[sheet_number])
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
# Table of contents for the sheets, ex. {'Authors' => 0, ...}
|
98
|
+
def sheet_toc
|
99
|
+
xml.workbook.xpath('/xmlns:workbook/xmlns:sheets/xmlns:sheet').
|
100
|
+
inject({}) do |acc, sheet|
|
101
|
+
|
102
|
+
acc[sheet.attributes['name'].value] =
|
103
|
+
sheet.attributes['sheetId'].value.to_i - 1 # keep things 0-indexed
|
104
|
+
|
105
|
+
acc
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_sheet(sheet_name, xsheet)
|
110
|
+
sheet = Sheet.new(sheet_name)
|
111
|
+
|
112
|
+
rownum = -1
|
113
|
+
sheet.rows =
|
114
|
+
xsheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").map do |xrow|
|
115
|
+
rownum += 1
|
116
|
+
|
117
|
+
colnum = -1
|
118
|
+
xrow.xpath('xmlns:c').map do |xcell|
|
119
|
+
colnum += 1
|
120
|
+
|
121
|
+
type = xcell.attributes['t'] &&
|
122
|
+
xcell.attributes['t'].value
|
123
|
+
# If not the above, attempt to determine from a custom style
|
124
|
+
type ||= xcell.attributes['s'] &&
|
125
|
+
style_types[xcell.attributes['s'].value.to_i]
|
126
|
+
|
127
|
+
begin
|
128
|
+
self.class.cast(xcell.text, type, shared_strings: shared_strings)
|
129
|
+
rescue => e
|
130
|
+
if !SimpleXlsxReader.configuration.catch_cell_load_errors
|
131
|
+
error = CellLoadError.new(
|
132
|
+
"Row #{rownum}, Col #{colnum}: #{e.message}")
|
133
|
+
error.set_backtrace(e.backtrace)
|
134
|
+
raise error
|
135
|
+
else
|
136
|
+
sheet.load_errors[[rownum, colnum]] = e.message
|
137
|
+
|
138
|
+
xcell.text
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
sheet
|
145
|
+
end
|
146
|
+
|
147
|
+
# Excel doesn't record types for some cells, only its display style, so
|
148
|
+
# we have to back out the type from that style.
|
149
|
+
#
|
150
|
+
# Some of these styles can be determined from a known set (see NumFmtMap),
|
151
|
+
# while others are 'custom' and we have to make a best guess.
|
152
|
+
#
|
153
|
+
# This is the array of types corresponding to the styles a spreadsheet
|
154
|
+
# uses, and includes both the known style types and the custom styles.
|
155
|
+
#
|
156
|
+
# Note that the xml sheet cells that use this don't reference the
|
157
|
+
# numFmtId, but instead the array index of a style in the stored list of
|
158
|
+
# only the styles used in the spreadsheet (which can be either known or
|
159
|
+
# custom). Hence this style types array, rather than a map of numFmtId to
|
160
|
+
# type.
|
161
|
+
def style_types
|
162
|
+
@style_types ||=
|
163
|
+
xml.styles.xpath('/xmlns:styleSheet/xmlns:cellXfs/xmlns:xf').map {|xstyle|
|
164
|
+
style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
|
165
|
+
end
|
166
|
+
|
167
|
+
# Finds the type we think a style is; For example, fmtId 14 is a date
|
168
|
+
# style, so this would return :date
|
169
|
+
def style_type_by_num_fmt_id(id)
|
170
|
+
return nil if id.nil?
|
171
|
+
|
172
|
+
id = id.to_i
|
173
|
+
if id > 164 # custom style, arg!
|
174
|
+
custom_style_types[id]
|
175
|
+
else # we should know this one
|
176
|
+
NumFmtMap[id]
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
# Map of (numFmtId > 164) (custom styles) to our best guess at the type
|
181
|
+
# ex. {165 => :date_time}
|
182
|
+
def custom_style_types
|
183
|
+
@custom_style_types ||=
|
184
|
+
xml.styles.xpath('/xmlns:styleSheet/xmlns:numFmts/xmlns:numFmt').
|
185
|
+
inject({}) do |acc, xstyle|
|
186
|
+
|
187
|
+
acc[xstyle.attributes['numFmtId'].value.to_i] =
|
188
|
+
determine_custom_style_type(xstyle.attributes['formatCode'].value)
|
189
|
+
|
190
|
+
acc
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
# This is the least deterministic part of reading xlsx files. Due to
|
195
|
+
# custom styles, you can't know for sure when a date is a date other than
|
196
|
+
# looking at its format and gessing. It's not impossible to guess right,
|
197
|
+
# though.
|
198
|
+
#
|
199
|
+
# http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
|
200
|
+
def determine_custom_style_type(string)
|
201
|
+
return :float if string[0] == '_'
|
202
|
+
return :float if string[0] == ' 0'
|
203
|
+
|
204
|
+
# Looks for one of ymdhis outside of meta-stuff like [Red]
|
205
|
+
return :date_time if string =~ /(^|\])[^\[]*[ymdhis]/i
|
206
|
+
|
207
|
+
return :unsupported
|
208
|
+
end
|
209
|
+
|
210
|
+
##
|
211
|
+
# The heart of typecasting. The ruby type is determined either explicitly
|
212
|
+
# from the cell xml or implicitly from the cell style, and this
|
213
|
+
# method expects that work to have been done already. This, then,
|
214
|
+
# takes the type we determined it to be and casts the cell value
|
215
|
+
# to that type.
|
216
|
+
#
|
217
|
+
# types:
|
218
|
+
# - s: shared string (see #shared_string)
|
219
|
+
# - n: number (cast to a float)
|
220
|
+
# - b: boolean
|
221
|
+
# - str: string
|
222
|
+
# - inlineStr: string
|
223
|
+
# - ruby symbol: for when type has been determined by style
|
224
|
+
#
|
225
|
+
# options:
|
226
|
+
# - shared_strings: needed for 's' (shared string) type
|
227
|
+
def self.cast(value, type, options = {})
|
228
|
+
return nil if value.nil? || value.empty?
|
229
|
+
|
230
|
+
case type
|
231
|
+
|
232
|
+
##
|
233
|
+
# There are few built-in types
|
234
|
+
##
|
235
|
+
|
236
|
+
when 's' # shared string
|
237
|
+
options[:shared_strings][value.to_i]
|
238
|
+
when 'n' # number
|
239
|
+
value.to_f
|
240
|
+
when 'b'
|
241
|
+
value.to_i == 1
|
242
|
+
when 'str'
|
243
|
+
value
|
244
|
+
when 'inlineStr'
|
245
|
+
value
|
246
|
+
|
247
|
+
##
|
248
|
+
# Type can also be determined by a style,
|
249
|
+
# detected earlier and cast here by its standardized symbol
|
250
|
+
##
|
251
|
+
|
252
|
+
when :string, :unsupported
|
253
|
+
value
|
254
|
+
when :fixnum
|
255
|
+
value.to_i
|
256
|
+
when :float
|
257
|
+
value.to_f
|
258
|
+
when :percentage
|
259
|
+
value.to_f / 100
|
260
|
+
# the trickiest. note that all these formats can vary on
|
261
|
+
# whether they actually contain a date, time, or datetime.
|
262
|
+
when :date, :time, :date_time
|
263
|
+
days_since_1900, fraction_of_24 = value.split('.')
|
264
|
+
|
265
|
+
# http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
|
266
|
+
date = Date.new(1899, 12, 30) + Integer(days_since_1900)
|
267
|
+
|
268
|
+
if fraction_of_24 # there is a time associated
|
269
|
+
fraction_of_24 = "0.#{fraction_of_24}".to_f
|
270
|
+
military = fraction_of_24 * 24
|
271
|
+
hour = military.truncate
|
272
|
+
minute = ((military % 1) * 60).truncate
|
273
|
+
|
274
|
+
return Time.utc(date.year, date.month, date.day, hour, minute)
|
275
|
+
else
|
276
|
+
return date
|
277
|
+
end
|
278
|
+
when :bignum
|
279
|
+
if defined?(BigDecimal)
|
280
|
+
BigDecimal.new(value)
|
281
|
+
else
|
282
|
+
value.to_f
|
283
|
+
end
|
284
|
+
|
285
|
+
##
|
286
|
+
# Beats me
|
287
|
+
##
|
288
|
+
|
289
|
+
else
|
290
|
+
value
|
291
|
+
end
|
292
|
+
end
|
293
|
+
|
294
|
+
# Map of non-custom numFmtId to casting symbol
|
295
|
+
NumFmtMap = {
|
296
|
+
0 => :string, # General
|
297
|
+
1 => :fixnum, # 0
|
298
|
+
2 => :float, # 0.00
|
299
|
+
3 => :fixnum, # #,##0
|
300
|
+
4 => :float, # #,##0.00
|
301
|
+
5 => :unsupported, # $#,##0_);($#,##0)
|
302
|
+
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
303
|
+
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
304
|
+
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
305
|
+
9 => :percentage, # 0%
|
306
|
+
10 => :percentage, # 0.00%
|
307
|
+
11 => :bignum, # 0.00E+00
|
308
|
+
12 => :unsupported, # # ?/?
|
309
|
+
13 => :unsupported, # # ??/??
|
310
|
+
14 => :date, # mm-dd-yy
|
311
|
+
15 => :date, # d-mmm-yy
|
312
|
+
16 => :date, # d-mmm
|
313
|
+
17 => :date, # mmm-yy
|
314
|
+
18 => :time, # h:mm AM/PM
|
315
|
+
19 => :time, # h:mm:ss AM/PM
|
316
|
+
20 => :time, # h:mm
|
317
|
+
21 => :time, # h:mm:ss
|
318
|
+
22 => :date_time, # m/d/yy h:mm
|
319
|
+
37 => :unsupported, # #,##0 ;(#,##0)
|
320
|
+
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
321
|
+
39 => :unsupported, # #,##0.00;(#,##0.00)
|
322
|
+
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
323
|
+
45 => :time, # mm:ss
|
324
|
+
46 => :time, # [h]:mm:ss
|
325
|
+
47 => :time, # mmss.0
|
326
|
+
48 => :bignum, # ##0.0E+0
|
327
|
+
49 => :unsupported # @
|
328
|
+
}
|
329
|
+
|
330
|
+
# For performance reasons, excel uses an optional SpreadsheetML feature
|
331
|
+
# that puts all strings in a separate xml file, and then references
|
332
|
+
# them by their index in that file.
|
333
|
+
#
|
334
|
+
# http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
|
335
|
+
def shared_strings
|
336
|
+
@shared_strings ||= begin
|
337
|
+
if xml.shared_strings
|
338
|
+
xml.shared_strings.xpath('/xmlns:sst/xmlns:si').map do |xsst|
|
339
|
+
# a shared string can be a single value...
|
340
|
+
sst = xsst.xpath('xmlns:t/text()').first
|
341
|
+
sst = sst.text if sst
|
342
|
+
# ... or a composite of seperately styled words/characters
|
343
|
+
sst ||= xsst.xpath('xmlns:r/xmlns:t/text()').map(&:text).join
|
344
|
+
end
|
345
|
+
else
|
346
|
+
[]
|
347
|
+
end
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
end
|
352
|
+
|
353
|
+
end
|
354
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'simple_xlsx_reader/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "simple_xlsx_reader"
|
8
|
+
gem.version = SimpleXlsxReader::VERSION
|
9
|
+
gem.authors = ["Woody Peterson"]
|
10
|
+
gem.email = ["woody@sigby.com"]
|
11
|
+
gem.description = %q{Read xlsx data the Ruby way}
|
12
|
+
gem.summary = %q{Read xlsx data the Ruby way}
|
13
|
+
gem.homepage = ""
|
14
|
+
|
15
|
+
gem.add_dependency 'nokogiri'
|
16
|
+
gem.add_dependency 'rubyzip'
|
17
|
+
|
18
|
+
gem.add_development_dependency 'pry'
|
19
|
+
|
20
|
+
gem.files = `git ls-files`.split($/)
|
21
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
22
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
23
|
+
gem.require_paths = ["lib"]
|
24
|
+
end
|
Binary file
|
@@ -0,0 +1,80 @@
|
|
1
|
+
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="6" uniqueCount="5">
|
2
|
+
<si>
|
3
|
+
<t>Cell A1</t>
|
4
|
+
</si>
|
5
|
+
<si>
|
6
|
+
<t>Cell B1</t>
|
7
|
+
</si>
|
8
|
+
<si>
|
9
|
+
<t>My Cell</t>
|
10
|
+
</si>
|
11
|
+
<si>
|
12
|
+
<r>
|
13
|
+
<rPr>
|
14
|
+
<sz val="11"/>
|
15
|
+
<color rgb="FFFF0000"/>
|
16
|
+
<rFont val="Calibri"/>
|
17
|
+
<family val="2"/>
|
18
|
+
<scheme val="minor"/>
|
19
|
+
</rPr>
|
20
|
+
<t>Cell</t>
|
21
|
+
</r>
|
22
|
+
<r>
|
23
|
+
<rPr>
|
24
|
+
<sz val="11"/>
|
25
|
+
<color theme="1"/>
|
26
|
+
<rFont val="Calibri"/>
|
27
|
+
<family val="2"/>
|
28
|
+
<scheme val="minor"/>
|
29
|
+
</rPr>
|
30
|
+
<t xml:space="preserve"> </t>
|
31
|
+
</r>
|
32
|
+
<r>
|
33
|
+
<rPr>
|
34
|
+
<b/>
|
35
|
+
<sz val="11"/>
|
36
|
+
<color theme="1"/>
|
37
|
+
<rFont val="Calibri"/>
|
38
|
+
<family val="2"/>
|
39
|
+
<scheme val="minor"/>
|
40
|
+
</rPr>
|
41
|
+
<t>A2</t>
|
42
|
+
</r>
|
43
|
+
</si>
|
44
|
+
<si>
|
45
|
+
<r>
|
46
|
+
<rPr>
|
47
|
+
<sz val="11"/>
|
48
|
+
<color rgb="FF00B0F0"/>
|
49
|
+
<rFont val="Calibri"/>
|
50
|
+
<family val="2"/>
|
51
|
+
<scheme val="minor"/>
|
52
|
+
</rPr>
|
53
|
+
<t>Cell</t>
|
54
|
+
</r>
|
55
|
+
<r>
|
56
|
+
<rPr>
|
57
|
+
<sz val="11"/>
|
58
|
+
<color theme="1"/>
|
59
|
+
<rFont val="Calibri"/>
|
60
|
+
<family val="2"/>
|
61
|
+
<scheme val="minor"/>
|
62
|
+
</rPr>
|
63
|
+
<t xml:space="preserve"> </t>
|
64
|
+
</r>
|
65
|
+
<r>
|
66
|
+
<rPr>
|
67
|
+
<i/>
|
68
|
+
<sz val="11"/>
|
69
|
+
<color theme="1"/>
|
70
|
+
<rFont val="Calibri"/>
|
71
|
+
<family val="2"/>
|
72
|
+
<scheme val="minor"/>
|
73
|
+
</rPr>
|
74
|
+
<t>B2</t>
|
75
|
+
</r>
|
76
|
+
</si>
|
77
|
+
<si>
|
78
|
+
<t>Cell Fmt</t>
|
79
|
+
</si>
|
80
|
+
</sst>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
require_relative 'test_helper'
|
2
|
+
require 'time'
|
3
|
+
|
4
|
+
describe SimpleXlsxReader do
|
5
|
+
let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
|
6
|
+
'sesame_street_blog.xlsx') }
|
7
|
+
|
8
|
+
let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
|
9
|
+
|
10
|
+
describe '#to_hash' do
|
11
|
+
it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
|
12
|
+
subject.to_hash.must_equal({
|
13
|
+
"Authors"=>
|
14
|
+
[["Name", "Occupation"],
|
15
|
+
["Big Bird", "Teacher"]],
|
16
|
+
|
17
|
+
"Posts"=>
|
18
|
+
[["Author Name", "Title", "Body", "Created At", "Comment Count"],
|
19
|
+
["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
|
20
|
+
["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2]]
|
21
|
+
})
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
describe SimpleXlsxReader::Document::Mapper do
|
26
|
+
let(:described_class) { SimpleXlsxReader::Document::Mapper }
|
27
|
+
|
28
|
+
describe '::cast' do
|
29
|
+
it 'reads type s as a shared string' do
|
30
|
+
described_class.cast('1', 's', shared_strings: ['a', 'b', 'c']).
|
31
|
+
must_equal 'b'
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'reads type inlineStr as a string' do
|
35
|
+
xml = Nokogiri::XML(%( <c t="inlineStr"><is><t>the value</t></is></c> ))
|
36
|
+
described_class.cast(xml.text, 'inlineStr').must_equal 'the value'
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
describe '#shared_strings' do
|
41
|
+
let(:xml) do
|
42
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
43
|
+
xml.shared_strings = Nokogiri::XML(File.read(
|
44
|
+
File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
subject { described_class.new(xml) }
|
49
|
+
|
50
|
+
it 'parses strings formatted at the cell level' do
|
51
|
+
subject.shared_strings[0..2].must_equal ['Cell A1', 'Cell B1', 'My Cell']
|
52
|
+
end
|
53
|
+
|
54
|
+
it 'parses strings formatted at the character level' do
|
55
|
+
subject.shared_strings[3..5].must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "parse errors" do
|
60
|
+
after do
|
61
|
+
SimpleXlsxReader.configuration.catch_cell_load_errors = false
|
62
|
+
end
|
63
|
+
|
64
|
+
let(:xml) do
|
65
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
66
|
+
xml.sheets = [Nokogiri::XML(
|
67
|
+
<<-XML
|
68
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
69
|
+
<sheetData>
|
70
|
+
<row>
|
71
|
+
<c s='0'>
|
72
|
+
<v>14 is a date style; this is not a date</v>
|
73
|
+
</c>
|
74
|
+
</row>
|
75
|
+
</sheetData>
|
76
|
+
</worksheet>
|
77
|
+
XML
|
78
|
+
)]
|
79
|
+
|
80
|
+
# s='0' above refers to the value of numFmtId at cellXfs index 0
|
81
|
+
xml.styles = Nokogiri::XML(
|
82
|
+
<<-XML
|
83
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
84
|
+
<cellXfs count="1">
|
85
|
+
<xf numFmtId="14" />
|
86
|
+
</cellXfs>
|
87
|
+
</styleSheet>
|
88
|
+
XML
|
89
|
+
)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
it 'raises if configuration.raise_on_parse_error' do
|
94
|
+
SimpleXlsxReader.configuration.catch_cell_load_errors = false
|
95
|
+
|
96
|
+
lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
|
97
|
+
must_raise(SimpleXlsxReader::CellLoadError)
|
98
|
+
end
|
99
|
+
|
100
|
+
it 'records a load error if not configuration.raise_on_parse_error' do
|
101
|
+
SimpleXlsxReader.configuration.catch_cell_load_errors = true
|
102
|
+
|
103
|
+
sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
|
104
|
+
sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
data/test/test_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,110 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: simple_xlsx_reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Woody Peterson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-01-16 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rubyzip
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: pry
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
description: Read xlsx data the Ruby way
|
63
|
+
email:
|
64
|
+
- woody@sigby.com
|
65
|
+
executables: []
|
66
|
+
extensions: []
|
67
|
+
extra_rdoc_files: []
|
68
|
+
files:
|
69
|
+
- .gitignore
|
70
|
+
- Gemfile
|
71
|
+
- LICENSE.txt
|
72
|
+
- README.md
|
73
|
+
- Rakefile
|
74
|
+
- lib/simple_xlsx_reader.rb
|
75
|
+
- lib/simple_xlsx_reader/version.rb
|
76
|
+
- simple_xlsx_reader.gemspec
|
77
|
+
- test/sesame_street_blog.xlsx
|
78
|
+
- test/shared_strings.xml
|
79
|
+
- test/simple_xlsx_reader_test.rb
|
80
|
+
- test/test_helper.rb
|
81
|
+
homepage: ''
|
82
|
+
licenses: []
|
83
|
+
post_install_message:
|
84
|
+
rdoc_options: []
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ! '>='
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
version: '0'
|
93
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
requirements: []
|
100
|
+
rubyforge_project:
|
101
|
+
rubygems_version: 1.8.24
|
102
|
+
signing_key:
|
103
|
+
specification_version: 3
|
104
|
+
summary: Read xlsx data the Ruby way
|
105
|
+
test_files:
|
106
|
+
- test/sesame_street_blog.xlsx
|
107
|
+
- test/shared_strings.xml
|
108
|
+
- test/simple_xlsx_reader_test.rb
|
109
|
+
- test/test_helper.rb
|
110
|
+
has_rdoc:
|