simple_xlsx_reader 1.0.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.github/workflows/ruby.yml +38 -0
- data/.travis.yml +8 -0
- data/CHANGELOG.md +22 -0
- data/README.md +190 -57
- data/Rakefile +3 -1
- data/lib/simple_xlsx_reader/document.rb +147 -0
- data/lib/simple_xlsx_reader/hyperlink.rb +30 -0
- data/lib/simple_xlsx_reader/loader/shared_strings_parser.rb +46 -0
- data/lib/simple_xlsx_reader/loader/sheet_parser.rb +256 -0
- data/lib/simple_xlsx_reader/loader/style_types_parser.rb +115 -0
- data/lib/simple_xlsx_reader/loader/workbook_parser.rb +39 -0
- data/lib/simple_xlsx_reader/loader.rb +199 -0
- data/lib/simple_xlsx_reader/version.rb +3 -1
- data/lib/simple_xlsx_reader.rb +23 -442
- data/simple_xlsx_reader.gemspec +4 -2
- data/test/date1904_test.rb +5 -4
- data/test/datetime_test.rb +17 -10
- data/test/gdocs_sheet.xlsx +0 -0
- data/test/gdocs_sheet_test.rb +16 -0
- data/test/lower_case_sharedstrings_test.rb +9 -4
- data/test/performance_test.rb +86 -89
- data/test/sesame_street_blog.xlsx +0 -0
- data/test/shared_strings.xml +4 -0
- data/test/simple_xlsx_reader_test.rb +835 -320
- data/test/test_helper.rb +4 -1
- data/test/test_xlsx_builder.rb +104 -0
- metadata +38 -9
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -1,7 +1,18 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'nokogiri'
|
3
4
|
require 'date'
|
4
5
|
|
6
|
+
require 'simple_xlsx_reader/version'
|
7
|
+
require 'simple_xlsx_reader/hyperlink'
|
8
|
+
require 'simple_xlsx_reader/document'
|
9
|
+
require 'simple_xlsx_reader/loader'
|
10
|
+
require 'simple_xlsx_reader/loader/workbook_parser'
|
11
|
+
require 'simple_xlsx_reader/loader/shared_strings_parser'
|
12
|
+
require 'simple_xlsx_reader/loader/sheet_parser'
|
13
|
+
require 'simple_xlsx_reader/loader/style_types_parser'
|
14
|
+
|
15
|
+
|
5
16
|
# Rubyzip 1.0 only has different naming, everything else is the same, so let's
|
6
17
|
# be flexible so we don't force people into a dependency hell w/ other gems.
|
7
18
|
begin
|
@@ -17,452 +28,22 @@ rescue LoadError
|
|
17
28
|
end
|
18
29
|
|
19
30
|
module SimpleXlsxReader
|
20
|
-
|
21
|
-
|
22
|
-
def self.configuration
|
23
|
-
@configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
|
24
|
-
c.catch_cell_load_errors = false
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def self.open(file_path)
|
29
|
-
Document.new(file_path).tap(&:sheets)
|
30
|
-
end
|
31
|
-
|
32
|
-
class Document
|
33
|
-
attr_reader :file_path
|
34
|
-
|
35
|
-
def initialize(file_path)
|
36
|
-
@file_path = file_path
|
37
|
-
end
|
38
|
-
|
39
|
-
def sheets
|
40
|
-
@sheets ||= Mapper.new(xml).load_sheets
|
41
|
-
end
|
31
|
+
DATE_SYSTEM_1900 = Date.new(1899, 12, 30)
|
32
|
+
DATE_SYSTEM_1904 = Date.new(1904, 1, 1)
|
42
33
|
|
43
|
-
|
44
|
-
sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
|
45
|
-
end
|
46
|
-
|
47
|
-
def xml
|
48
|
-
Xml.load(file_path)
|
49
|
-
end
|
50
|
-
|
51
|
-
class Sheet < Struct.new(:name, :rows)
|
52
|
-
def headers
|
53
|
-
rows[0]
|
54
|
-
end
|
55
|
-
|
56
|
-
def data
|
57
|
-
rows[1..-1]
|
58
|
-
end
|
34
|
+
class CellLoadError < StandardError; end
|
59
35
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
@load_errors ||= {}
|
36
|
+
class << self
|
37
|
+
def configuration
|
38
|
+
@configuration ||= Struct.new(:catch_cell_load_errors, :auto_slurp).new.tap do |c|
|
39
|
+
c.catch_cell_load_errors = false
|
40
|
+
c.auto_slurp = false
|
66
41
|
end
|
67
42
|
end
|
68
43
|
|
69
|
-
|
70
|
-
|
71
|
-
class Xml
|
72
|
-
attr_accessor :workbook, :shared_strings, :sheets, :styles
|
73
|
-
|
74
|
-
def self.load(file_path)
|
75
|
-
self.new.tap do |xml|
|
76
|
-
SimpleXlsxReader::Zip.open(file_path) do |zip|
|
77
|
-
xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
|
78
|
-
xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
|
79
|
-
|
80
|
-
# optional feature used by excel, but not often used by xlsx
|
81
|
-
# generation libraries
|
82
|
-
ss_file = (zip.to_a.map(&:name) & ['xl/sharedStrings.xml','xl/sharedstrings.xml'])[0]
|
83
|
-
if ss_file
|
84
|
-
xml.shared_strings = Nokogiri::XML(zip.read(ss_file)).remove_namespaces!
|
85
|
-
end
|
86
|
-
|
87
|
-
xml.sheets = []
|
88
|
-
i = 0
|
89
|
-
loop do
|
90
|
-
i += 1
|
91
|
-
break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
|
92
|
-
|
93
|
-
xml.sheets <<
|
94
|
-
Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
|
95
|
-
end
|
96
|
-
end
|
97
|
-
end
|
98
|
-
end
|
44
|
+
def open(file_path)
|
45
|
+
Document.new(file_path).tap(&:sheets)
|
99
46
|
end
|
100
|
-
|
101
|
-
##
|
102
|
-
# For internal use; translates source xml to Sheet objects.
|
103
|
-
class Mapper < Struct.new(:xml)
|
104
|
-
DATE_SYSTEM_1900 = Date.new(1899, 12, 30)
|
105
|
-
DATE_SYSTEM_1904 = Date.new(1904, 1, 1)
|
106
|
-
|
107
|
-
def load_sheets
|
108
|
-
sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
|
109
|
-
parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
# Table of contents for the sheets, ex. {'Authors' => 0, ...}
|
114
|
-
def sheet_toc
|
115
|
-
xml.workbook.xpath('/workbook/sheets/sheet').
|
116
|
-
inject({}) do |acc, sheet|
|
117
|
-
|
118
|
-
acc[sheet.attributes['name'].value] =
|
119
|
-
sheet.attributes['sheetId'].value.to_i - 1 # keep things 0-indexed
|
120
|
-
|
121
|
-
acc
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def parse_sheet(sheet_name, xsheet)
|
126
|
-
sheet = Sheet.new(sheet_name)
|
127
|
-
sheet_width, sheet_height = *sheet_dimensions(xsheet)
|
128
|
-
|
129
|
-
sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
|
130
|
-
xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
|
131
|
-
column, row = *xcell.attr('r').match(/([A-Z]+)([0-9]+)/).captures
|
132
|
-
col_idx = column_letter_to_number(column) - 1
|
133
|
-
row_idx = row.to_i - 1
|
134
|
-
|
135
|
-
type = xcell.attributes['t'] &&
|
136
|
-
xcell.attributes['t'].value
|
137
|
-
style = xcell.attributes['s'] &&
|
138
|
-
style_types[xcell.attributes['s'].value.to_i]
|
139
|
-
|
140
|
-
# This is the main performance bottleneck. Using just 'xcell.text'
|
141
|
-
# would be ideal, and makes parsing super-fast. However, there's
|
142
|
-
# other junk in the cell, formula references in particular,
|
143
|
-
# so we really do have to look for specific value nodes.
|
144
|
-
# Maybe there is a really clever way to use xcell.text and parse out
|
145
|
-
# the correct value, but I can't think of one, or an alternative
|
146
|
-
# strategy.
|
147
|
-
#
|
148
|
-
# And yes, this really is faster than using xcell.at_xpath(...),
|
149
|
-
# by about 60%. Odd.
|
150
|
-
xvalue = type == 'inlineStr' ?
|
151
|
-
(xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
|
152
|
-
xcell.children.find {|c| c.name == 'v'}
|
153
|
-
|
154
|
-
cell = begin
|
155
|
-
self.class.cast(xvalue && xvalue.text.strip, type, style,
|
156
|
-
:shared_strings => shared_strings,
|
157
|
-
:base_date => base_date)
|
158
|
-
rescue => e
|
159
|
-
if !SimpleXlsxReader.configuration.catch_cell_load_errors
|
160
|
-
error = CellLoadError.new(
|
161
|
-
"Row #{row_idx}, Col #{col_idx}: #{e.message}")
|
162
|
-
error.set_backtrace(e.backtrace)
|
163
|
-
raise error
|
164
|
-
else
|
165
|
-
sheet.load_errors[[row_idx, col_idx]] = e.message
|
166
|
-
|
167
|
-
xcell.text.strip
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
# This shouldn't be necessary, but just in case, we'll create
|
172
|
-
# the row so we don't blow up. This means any null rows in between
|
173
|
-
# will be null instead of [null, null, ...]
|
174
|
-
sheet.rows[row_idx] ||= Array.new(sheet_width)
|
175
|
-
|
176
|
-
sheet.rows[row_idx][col_idx] = cell
|
177
|
-
end
|
178
|
-
|
179
|
-
sheet
|
180
|
-
end
|
181
|
-
|
182
|
-
##
|
183
|
-
# Returns the last column name, ex. 'E'
|
184
|
-
#
|
185
|
-
# Note that excel writes a '/worksheet/dimension' node we can get the
|
186
|
-
# last cell from, but some libs (ex. simple_xlsx_writer) don't record
|
187
|
-
# this. In that case, we assume the data is of uniform column length
|
188
|
-
# and check the column name of the last header row. Obviously this isn't
|
189
|
-
# the most robust strategy, but it likely fits 99% of use cases
|
190
|
-
# considering it's not a problem with actual excel docs.
|
191
|
-
def last_cell_label(xsheet)
|
192
|
-
dimension = xsheet.at_xpath('/worksheet/dimension')
|
193
|
-
if dimension
|
194
|
-
col = dimension.attributes['ref'].value.match(/:([A-Z]+[0-9]+)/)
|
195
|
-
col ? col.captures.first : 'A1'
|
196
|
-
else
|
197
|
-
last = xsheet.at_xpath("/worksheet/sheetData/row[last()]/c[last()]")
|
198
|
-
last ? last.attributes['r'].value.match(/([A-Z]+[0-9]+)/).captures.first : 'A1'
|
199
|
-
end
|
200
|
-
end
|
201
|
-
|
202
|
-
# Returns dimensions (1-indexed)
|
203
|
-
def sheet_dimensions(xsheet)
|
204
|
-
column, row = *last_cell_label(xsheet).match(/([A-Z]+)([0-9]+)/).captures
|
205
|
-
[column_letter_to_number(column), row.to_i]
|
206
|
-
end
|
207
|
-
|
208
|
-
# formula fits an exponential factorial function of the form:
|
209
|
-
# 'A' = 1
|
210
|
-
# 'B' = 2
|
211
|
-
# 'Z' = 26
|
212
|
-
# 'AA' = 26 * 1 + 1
|
213
|
-
# 'AZ' = 26 * 1 + 26
|
214
|
-
# 'BA' = 26 * 2 + 1
|
215
|
-
# 'ZA' = 26 * 26 + 1
|
216
|
-
# 'ZZ' = 26 * 26 + 26
|
217
|
-
# 'AAA' = 26 * 26 * 1 + 26 * 1 + 1
|
218
|
-
# 'AAZ' = 26 * 26 * 1 + 26 * 1 + 26
|
219
|
-
# 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
|
220
|
-
# 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
|
221
|
-
def column_letter_to_number(column_letter)
|
222
|
-
pow = column_letter.length - 1
|
223
|
-
result = 0
|
224
|
-
column_letter.each_byte do |b|
|
225
|
-
result += 26**pow * (b - 64)
|
226
|
-
pow -= 1
|
227
|
-
end
|
228
|
-
result
|
229
|
-
end
|
230
|
-
|
231
|
-
# Excel doesn't record types for some cells, only its display style, so
|
232
|
-
# we have to back out the type from that style.
|
233
|
-
#
|
234
|
-
# Some of these styles can be determined from a known set (see NumFmtMap),
|
235
|
-
# while others are 'custom' and we have to make a best guess.
|
236
|
-
#
|
237
|
-
# This is the array of types corresponding to the styles a spreadsheet
|
238
|
-
# uses, and includes both the known style types and the custom styles.
|
239
|
-
#
|
240
|
-
# Note that the xml sheet cells that use this don't reference the
|
241
|
-
# numFmtId, but instead the array index of a style in the stored list of
|
242
|
-
# only the styles used in the spreadsheet (which can be either known or
|
243
|
-
# custom). Hence this style types array, rather than a map of numFmtId to
|
244
|
-
# type.
|
245
|
-
def style_types
|
246
|
-
@style_types ||=
|
247
|
-
xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
|
248
|
-
style_type_by_num_fmt_id(num_fmt_id(xstyle))}
|
249
|
-
end
|
250
|
-
|
251
|
-
#returns the numFmtId value if it's available
|
252
|
-
def num_fmt_id(xstyle)
|
253
|
-
if xstyle.attributes['numFmtId']
|
254
|
-
xstyle.attributes['numFmtId'].value
|
255
|
-
else
|
256
|
-
nil
|
257
|
-
end
|
258
|
-
end
|
259
|
-
|
260
|
-
# Finds the type we think a style is; For example, fmtId 14 is a date
|
261
|
-
# style, so this would return :date.
|
262
|
-
#
|
263
|
-
# Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
|
264
|
-
# but in practice can sometimes be simply out of the usual "Any Language"
|
265
|
-
# id range that goes up to 49. For example, I have seen a numFmtId of
|
266
|
-
# 59 specified as a date. In Thai, 59 is a number format, so this seems
|
267
|
-
# like a bad idea, but we try to be flexible and just go with it.
|
268
|
-
def style_type_by_num_fmt_id(id)
|
269
|
-
return nil if id.nil?
|
270
|
-
|
271
|
-
id = id.to_i
|
272
|
-
NumFmtMap[id] || custom_style_types[id]
|
273
|
-
end
|
274
|
-
|
275
|
-
# Map of (numFmtId >= 164) (custom styles) to our best guess at the type
|
276
|
-
# ex. {164 => :date_time}
|
277
|
-
def custom_style_types
|
278
|
-
@custom_style_types ||=
|
279
|
-
xml.styles.xpath('/styleSheet/numFmts/numFmt').
|
280
|
-
inject({}) do |acc, xstyle|
|
281
|
-
|
282
|
-
acc[xstyle.attributes['numFmtId'].value.to_i] =
|
283
|
-
determine_custom_style_type(xstyle.attributes['formatCode'].value)
|
284
|
-
|
285
|
-
acc
|
286
|
-
end
|
287
|
-
end
|
288
|
-
|
289
|
-
# This is the least deterministic part of reading xlsx files. Due to
|
290
|
-
# custom styles, you can't know for sure when a date is a date other than
|
291
|
-
# looking at its format and gessing. It's not impossible to guess right,
|
292
|
-
# though.
|
293
|
-
#
|
294
|
-
# http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
|
295
|
-
def determine_custom_style_type(string)
|
296
|
-
return :float if string[0] == '_'
|
297
|
-
return :float if string[0] == ' 0'
|
298
|
-
|
299
|
-
# Looks for one of ymdhis outside of meta-stuff like [Red]
|
300
|
-
return :date_time if string =~ /(^|\])[^\[]*[ymdhis]/i
|
301
|
-
|
302
|
-
return :unsupported
|
303
|
-
end
|
304
|
-
|
305
|
-
##
|
306
|
-
# The heart of typecasting. The ruby type is determined either explicitly
|
307
|
-
# from the cell xml or implicitly from the cell style, and this
|
308
|
-
# method expects that work to have been done already. This, then,
|
309
|
-
# takes the type we determined it to be and casts the cell value
|
310
|
-
# to that type.
|
311
|
-
#
|
312
|
-
# types:
|
313
|
-
# - s: shared string (see #shared_string)
|
314
|
-
# - n: number (cast to a float)
|
315
|
-
# - b: boolean
|
316
|
-
# - str: string
|
317
|
-
# - inlineStr: string
|
318
|
-
# - ruby symbol: for when type has been determined by style
|
319
|
-
#
|
320
|
-
# options:
|
321
|
-
# - shared_strings: needed for 's' (shared string) type
|
322
|
-
def self.cast(value, type, style, options = {})
|
323
|
-
return nil if value.nil? || value.empty?
|
324
|
-
|
325
|
-
# Sometimes the type is dictated by the style alone
|
326
|
-
if type.nil? ||
|
327
|
-
(type == 'n' && [:date, :time, :date_time].include?(style))
|
328
|
-
type = style
|
329
|
-
end
|
330
|
-
|
331
|
-
case type
|
332
|
-
|
333
|
-
##
|
334
|
-
# There are few built-in types
|
335
|
-
##
|
336
|
-
|
337
|
-
when 's' # shared string
|
338
|
-
options[:shared_strings][value.to_i]
|
339
|
-
when 'n' # number
|
340
|
-
value.to_f
|
341
|
-
when 'b'
|
342
|
-
value.to_i == 1
|
343
|
-
when 'str'
|
344
|
-
value
|
345
|
-
when 'inlineStr'
|
346
|
-
value
|
347
|
-
|
348
|
-
##
|
349
|
-
# Type can also be determined by a style,
|
350
|
-
# detected earlier and cast here by its standardized symbol
|
351
|
-
##
|
352
|
-
|
353
|
-
when :string, :unsupported
|
354
|
-
value
|
355
|
-
when :fixnum
|
356
|
-
value.to_i
|
357
|
-
when :float
|
358
|
-
value.to_f
|
359
|
-
when :percentage
|
360
|
-
value.to_f / 100
|
361
|
-
# the trickiest. note that all these formats can vary on
|
362
|
-
# whether they actually contain a date, time, or datetime.
|
363
|
-
when :date, :time, :date_time
|
364
|
-
value = value.to_f
|
365
|
-
days_since_date_system_start = value.to_i
|
366
|
-
fraction_of_24 = value - days_since_date_system_start
|
367
|
-
|
368
|
-
# http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
|
369
|
-
date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
|
370
|
-
|
371
|
-
if fraction_of_24 > 0 # there is a time associated
|
372
|
-
seconds = (fraction_of_24 * 86400).round
|
373
|
-
return Time.utc(date.year, date.month, date.day) + seconds
|
374
|
-
else
|
375
|
-
return date
|
376
|
-
end
|
377
|
-
when :bignum
|
378
|
-
if defined?(BigDecimal)
|
379
|
-
BigDecimal.new(value)
|
380
|
-
else
|
381
|
-
value.to_f
|
382
|
-
end
|
383
|
-
|
384
|
-
##
|
385
|
-
# Beats me
|
386
|
-
##
|
387
|
-
|
388
|
-
else
|
389
|
-
value
|
390
|
-
end
|
391
|
-
end
|
392
|
-
|
393
|
-
## Returns the base_date from which to calculate dates.
|
394
|
-
# Defaults to 1900 (minus two days due to excel quirk), but use 1904 if
|
395
|
-
# it's set in the Workbook's workbookPr.
|
396
|
-
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
|
397
|
-
def base_date
|
398
|
-
@base_date ||=
|
399
|
-
begin
|
400
|
-
return DATE_SYSTEM_1900 if xml.workbook == nil
|
401
|
-
xml.workbook.xpath("//workbook/workbookPr[@date1904]").each do |workbookPr|
|
402
|
-
return DATE_SYSTEM_1904 if workbookPr["date1904"] =~ /true|1/i
|
403
|
-
end
|
404
|
-
DATE_SYSTEM_1900
|
405
|
-
end
|
406
|
-
end
|
407
|
-
|
408
|
-
# Map of non-custom numFmtId to casting symbol
|
409
|
-
NumFmtMap = {
|
410
|
-
0 => :string, # General
|
411
|
-
1 => :fixnum, # 0
|
412
|
-
2 => :float, # 0.00
|
413
|
-
3 => :fixnum, # #,##0
|
414
|
-
4 => :float, # #,##0.00
|
415
|
-
5 => :unsupported, # $#,##0_);($#,##0)
|
416
|
-
6 => :unsupported, # $#,##0_);[Red]($#,##0)
|
417
|
-
7 => :unsupported, # $#,##0.00_);($#,##0.00)
|
418
|
-
8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
|
419
|
-
9 => :percentage, # 0%
|
420
|
-
10 => :percentage, # 0.00%
|
421
|
-
11 => :bignum, # 0.00E+00
|
422
|
-
12 => :unsupported, # # ?/?
|
423
|
-
13 => :unsupported, # # ??/??
|
424
|
-
14 => :date, # mm-dd-yy
|
425
|
-
15 => :date, # d-mmm-yy
|
426
|
-
16 => :date, # d-mmm
|
427
|
-
17 => :date, # mmm-yy
|
428
|
-
18 => :time, # h:mm AM/PM
|
429
|
-
19 => :time, # h:mm:ss AM/PM
|
430
|
-
20 => :time, # h:mm
|
431
|
-
21 => :time, # h:mm:ss
|
432
|
-
22 => :date_time, # m/d/yy h:mm
|
433
|
-
37 => :unsupported, # #,##0 ;(#,##0)
|
434
|
-
38 => :unsupported, # #,##0 ;[Red](#,##0)
|
435
|
-
39 => :unsupported, # #,##0.00;(#,##0.00)
|
436
|
-
40 => :unsupported, # #,##0.00;[Red](#,##0.00)
|
437
|
-
45 => :time, # mm:ss
|
438
|
-
46 => :time, # [h]:mm:ss
|
439
|
-
47 => :time, # mmss.0
|
440
|
-
48 => :bignum, # ##0.0E+0
|
441
|
-
49 => :unsupported # @
|
442
|
-
}
|
443
|
-
|
444
|
-
# For performance reasons, excel uses an optional SpreadsheetML feature
|
445
|
-
# that puts all strings in a separate xml file, and then references
|
446
|
-
# them by their index in that file.
|
447
|
-
#
|
448
|
-
# http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
|
449
|
-
def shared_strings
|
450
|
-
@shared_strings ||= begin
|
451
|
-
if xml.shared_strings
|
452
|
-
xml.shared_strings.xpath('/sst/si').map do |xsst|
|
453
|
-
# a shared string can be a single value...
|
454
|
-
sst = xsst.at_xpath('t/text()')
|
455
|
-
sst = sst.text if sst
|
456
|
-
# ... or a composite of seperately styled words/characters
|
457
|
-
sst ||= xsst.xpath('r/t/text()').map(&:text).join
|
458
|
-
end
|
459
|
-
else
|
460
|
-
[]
|
461
|
-
end
|
462
|
-
end
|
463
|
-
end
|
464
|
-
|
465
|
-
end
|
466
|
-
|
47
|
+
alias parse open
|
467
48
|
end
|
468
49
|
end
|
data/simple_xlsx_reader.gemspec
CHANGED
@@ -7,19 +7,21 @@ Gem::Specification.new do |gem|
|
|
7
7
|
gem.name = "simple_xlsx_reader"
|
8
8
|
gem.version = SimpleXlsxReader::VERSION
|
9
9
|
gem.authors = ["Woody Peterson"]
|
10
|
-
gem.email = ["woody@
|
10
|
+
gem.email = ["woody.peterson@gmail.com"]
|
11
11
|
gem.description = %q{Read xlsx data the Ruby way}
|
12
12
|
gem.summary = %q{Read xlsx data the Ruby way}
|
13
13
|
gem.homepage = ""
|
14
|
+
gem.license = "MIT"
|
14
15
|
|
15
16
|
gem.add_dependency 'nokogiri'
|
16
17
|
gem.add_dependency 'rubyzip'
|
17
18
|
|
18
19
|
gem.add_development_dependency 'minitest', '>= 5.0'
|
20
|
+
gem.add_development_dependency 'rake'
|
19
21
|
gem.add_development_dependency 'pry'
|
20
22
|
|
21
23
|
gem.files = `git ls-files`.split($/)
|
22
24
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
23
|
-
gem.test_files = gem.files.grep(%r{^
|
25
|
+
gem.test_files = gem.files.grep(%r{^test/})
|
24
26
|
gem.require_paths = ["lib"]
|
25
27
|
end
|
data/test/date1904_test.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'test_helper'
|
2
4
|
|
3
5
|
describe SimpleXlsxReader do
|
@@ -5,9 +7,8 @@ describe SimpleXlsxReader do
|
|
5
7
|
let(:subject) { SimpleXlsxReader::Document.new(date1904_file) }
|
6
8
|
|
7
9
|
it 'supports converting dates with the 1904 date system' do
|
8
|
-
subject.to_hash.must_equal(
|
9
|
-
|
10
|
-
|
10
|
+
_(subject.to_hash).must_equal(
|
11
|
+
'date1904' => [[Date.parse('2014-05-01')]]
|
12
|
+
)
|
11
13
|
end
|
12
|
-
|
13
14
|
end
|
data/test/datetime_test.rb
CHANGED
@@ -1,19 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'test_helper'
|
2
4
|
|
3
5
|
describe SimpleXlsxReader do
|
4
|
-
let(:datetimes_file)
|
5
|
-
|
6
|
+
let(:datetimes_file) do
|
7
|
+
File.join(
|
8
|
+
File.dirname(__FILE__),
|
9
|
+
'datetimes.xlsx'
|
10
|
+
)
|
11
|
+
end
|
6
12
|
|
7
13
|
let(:subject) { SimpleXlsxReader::Document.new(datetimes_file) }
|
8
14
|
|
9
15
|
it 'converts date_times with the correct precision' do
|
10
|
-
subject.to_hash.must_equal(
|
11
|
-
|
12
|
-
[
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
16
|
+
_(subject.to_hash).must_equal(
|
17
|
+
'Datetimes' =>
|
18
|
+
[
|
19
|
+
[Time.parse('2013-08-19 18:29:59 UTC')],
|
20
|
+
[Time.parse('2013-08-19 18:30:00 UTC')],
|
21
|
+
[Time.parse('2013-08-19 18:30:01 UTC')],
|
22
|
+
[Time.parse('1899-12-30 00:30:00 UTC')]
|
23
|
+
]
|
24
|
+
)
|
17
25
|
end
|
18
|
-
|
19
26
|
end
|
Binary file
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'test_helper'
|
4
|
+
require 'time'
|
5
|
+
|
6
|
+
describe SimpleXlsxReader do
|
7
|
+
let(:one_sheet_file) { File.join(File.dirname(__FILE__), 'gdocs_sheet.xlsx') }
|
8
|
+
let(:subject) { SimpleXlsxReader::Document.new(one_sheet_file) }
|
9
|
+
|
10
|
+
it 'able to load file from google docs' do
|
11
|
+
_(subject.to_hash).must_equal(
|
12
|
+
'List 1' => [['Empty gdocs list 1']],
|
13
|
+
'List 2' => [['Empty gdocs list 2']]
|
14
|
+
)
|
15
|
+
end
|
16
|
+
end
|
@@ -1,15 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'test_helper'
|
2
4
|
|
3
5
|
describe SimpleXlsxReader do
|
4
|
-
let(:lower_case_shared_strings)
|
5
|
-
|
6
|
+
let(:lower_case_shared_strings) do
|
7
|
+
File.join(
|
8
|
+
File.dirname(__FILE__),
|
9
|
+
'lower_case_sharedstrings.xlsx'
|
10
|
+
)
|
11
|
+
end
|
6
12
|
|
7
13
|
let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
|
8
14
|
|
9
|
-
|
10
15
|
describe '#to_hash' do
|
11
16
|
it 'should have the word Well in the first row' do
|
12
|
-
subject.sheets.first.rows[0].must_include('Well')
|
17
|
+
_(subject.sheets.first.rows.to_a[0]).must_include('Well')
|
13
18
|
end
|
14
19
|
end
|
15
20
|
end
|