simple_xlsx_reader 0.9.8 → 1.0.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +12 -1
- data/lib/simple_xlsx_reader.rb +91 -64
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/simple_xlsx_reader.gemspec +1 -1
- data/test/datetime_test.rb +18 -0
- data/test/datetimes.xlsx +0 -0
- data/test/performance_test.rb +10 -10
- data/test/simple_xlsx_reader_test.rb +109 -22
- data/test/test_helper.rb +1 -1
- metadata +21 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d24749c8713f2f57e47d33a363801333130c5df
|
4
|
+
data.tar.gz: 0b26df686d579163845fd6e06936bff930c52c33
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 523248f26fdf45c2978716836a2d81c5d3a4877bb74f29d7bfa51be1965e1ef61d1e67c83ec98bcac83f7b8e39ac08f666f6d31341428a5d0afa993e149ed7cd
|
7
|
+
data.tar.gz: 710789e5c5dd70d9d360e3a776662dc64138965b54984772cac9785018e2c9605a2b9ea3a2b889ec171183c64610901da527ae91989e9ea149d9016dc8497c71
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -97,7 +97,18 @@ This project follows [semantic versioning 1.0](http://semver.org/spec/v1.0.0.htm
|
|
97
97
|
|
98
98
|
## Contributing
|
99
99
|
|
100
|
-
|
100
|
+
Remember to write tests, think about edge cases, and run the existing
|
101
|
+
suite.
|
102
|
+
|
103
|
+
Note that as of commit 665cbafdde, the most extreme end of the
|
104
|
+
linear-time performance test, which is 10,000 rows (12 columns), runs in
|
105
|
+
~4 seconds on Ruby 2.1 on a 2012 MBP. If the linear time assertion fails
|
106
|
+
or you're way off that, there is probably a performance regression in
|
107
|
+
your code.
|
108
|
+
|
109
|
+
Then, the standard stuff:
|
110
|
+
|
111
|
+
1. Fork this project
|
101
112
|
2. Create your feature branch (`git checkout -b my-new-feature`)
|
102
113
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
103
114
|
4. Push to the branch (`git push origin my-new-feature`)
|
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -74,13 +74,13 @@ module SimpleXlsxReader
|
|
74
74
|
def self.load(file_path)
|
75
75
|
self.new.tap do |xml|
|
76
76
|
SimpleXlsxReader::Zip.open(file_path) do |zip|
|
77
|
-
xml.workbook
|
78
|
-
xml.styles
|
77
|
+
xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
|
78
|
+
xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
|
79
79
|
|
80
80
|
# optional feature used by excel, but not often used by xlsx
|
81
81
|
# generation libraries
|
82
82
|
if zip.file.file?('xl/sharedStrings.xml')
|
83
|
-
xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
|
83
|
+
xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
|
84
84
|
end
|
85
85
|
|
86
86
|
xml.sheets = []
|
@@ -90,7 +90,7 @@ module SimpleXlsxReader
|
|
90
90
|
break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
|
91
91
|
|
92
92
|
xml.sheets <<
|
93
|
-
Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
|
93
|
+
Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
|
94
94
|
end
|
95
95
|
end
|
96
96
|
end
|
@@ -101,14 +101,14 @@ module SimpleXlsxReader
|
|
101
101
|
# For internal use; translates source xml to Sheet objects.
|
102
102
|
class Mapper < Struct.new(:xml)
|
103
103
|
def load_sheets
|
104
|
-
sheet_toc.each_with_index.map do |(sheet_name,
|
104
|
+
sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
|
105
105
|
parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
109
|
# Table of contents for the sheets, ex. {'Authors' => 0, ...}
|
110
110
|
def sheet_toc
|
111
|
-
xml.workbook.xpath('/
|
111
|
+
xml.workbook.xpath('/workbook/sheets/sheet').
|
112
112
|
inject({}) do |acc, sheet|
|
113
113
|
|
114
114
|
acc[sheet.attributes['name'].value] =
|
@@ -120,52 +120,55 @@ module SimpleXlsxReader
|
|
120
120
|
|
121
121
|
def parse_sheet(sheet_name, xsheet)
|
122
122
|
sheet = Sheet.new(sheet_name)
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
xcell.text.strip
|
164
|
-
end
|
123
|
+
sheet_width, sheet_height = *sheet_dimensions(xsheet)
|
124
|
+
|
125
|
+
sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
|
126
|
+
xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
|
127
|
+
column, row = *xcell.attr('r').match(/([A-Z]+)([0-9]+)/).captures
|
128
|
+
col_idx = column_letter_to_number(column) - 1
|
129
|
+
row_idx = row.to_i - 1
|
130
|
+
|
131
|
+
type = xcell.attributes['t'] &&
|
132
|
+
xcell.attributes['t'].value
|
133
|
+
style = xcell.attributes['s'] &&
|
134
|
+
style_types[xcell.attributes['s'].value.to_i]
|
135
|
+
|
136
|
+
# This is the main performance bottleneck. Using just 'xcell.text'
|
137
|
+
# would be ideal, and makes parsing super-fast. However, there's
|
138
|
+
# other junk in the cell, formula references in particular,
|
139
|
+
# so we really do have to look for specific value nodes.
|
140
|
+
# Maybe there is a really clever way to use xcell.text and parse out
|
141
|
+
# the correct value, but I can't think of one, or an alternative
|
142
|
+
# strategy.
|
143
|
+
#
|
144
|
+
# And yes, this really is faster than using xcell.at_xpath(...),
|
145
|
+
# by about 60%. Odd.
|
146
|
+
xvalue = type == 'inlineStr' ?
|
147
|
+
(xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
|
148
|
+
xcell.children.find {|c| c.name == 'v'}
|
149
|
+
|
150
|
+
cell = begin
|
151
|
+
self.class.cast(xvalue && xvalue.text.strip, type, style,
|
152
|
+
:shared_strings => shared_strings)
|
153
|
+
rescue => e
|
154
|
+
if !SimpleXlsxReader.configuration.catch_cell_load_errors
|
155
|
+
error = CellLoadError.new(
|
156
|
+
"Row #{row_idx}, Col #{col_idx}: #{e.message}")
|
157
|
+
error.set_backtrace(e.backtrace)
|
158
|
+
raise error
|
159
|
+
else
|
160
|
+
sheet.load_errors[[row_idx, col_idx]] = e.message
|
161
|
+
|
162
|
+
xcell.text.strip
|
165
163
|
end
|
166
164
|
end
|
167
165
|
|
168
|
-
|
166
|
+
# This shouldn't be necessary, but just in case, we'll create
|
167
|
+
# the row so we don't blow up. This means any null rows in between
|
168
|
+
# will be null instead of [null, null, ...]
|
169
|
+
sheet.rows[row_idx] ||= Array.new(sheet_width)
|
170
|
+
|
171
|
+
sheet.rows[row_idx][col_idx] = cell
|
169
172
|
end
|
170
173
|
|
171
174
|
sheet
|
@@ -180,17 +183,43 @@ module SimpleXlsxReader
|
|
180
183
|
# and check the column name of the last header row. Obviously this isn't
|
181
184
|
# the most robust strategy, but it likely fits 99% of use cases
|
182
185
|
# considering it's not a problem with actual excel docs.
|
183
|
-
def
|
184
|
-
dimension = xsheet.at_xpath('/
|
186
|
+
def last_cell_label(xsheet)
|
187
|
+
dimension = xsheet.at_xpath('/worksheet/dimension')
|
185
188
|
if dimension
|
186
|
-
col = dimension.attributes['ref'].value.match(/:([A-Z]
|
187
|
-
col ? col.captures.first : '
|
189
|
+
col = dimension.attributes['ref'].value.match(/:([A-Z]+[0-9]+)/)
|
190
|
+
col ? col.captures.first : 'A1'
|
188
191
|
else
|
189
|
-
last = xsheet.at_xpath("/
|
190
|
-
last ? last.attributes['r'].value.match(/([A-Z]
|
192
|
+
last = xsheet.at_xpath("/worksheet/sheetData/row[last()]/c[last()]")
|
193
|
+
last ? last.attributes['r'].value.match(/([A-Z]+[0-9]+)/).captures.first : 'A1'
|
191
194
|
end
|
192
195
|
end
|
193
196
|
|
197
|
+
# Returns dimensions (1-indexed)
|
198
|
+
def sheet_dimensions(xsheet)
|
199
|
+
column, row = *last_cell_label(xsheet).match(/([A-Z]+)([0-9]+)/).captures
|
200
|
+
[column_letter_to_number(column), row.to_i]
|
201
|
+
end
|
202
|
+
|
203
|
+
# formula fits an exponential factorial function of the form:
|
204
|
+
# 'A' = 1
|
205
|
+
# 'B' = 2
|
206
|
+
# 'Z' = 26
|
207
|
+
# 'AA' = 26 * 1 + 1
|
208
|
+
# 'AZ' = 26 * 1 + 26
|
209
|
+
# 'BA' = 26 * 2 + 1
|
210
|
+
# 'ZA' = 26 * 26 + 1
|
211
|
+
# 'ZZ' = 26 * 26 + 26
|
212
|
+
# 'AAA' = 26 * 26 * 1 + 26 * 1 + 1
|
213
|
+
# 'AAZ' = 26 * 26 * 1 + 26 * 1 + 26
|
214
|
+
# 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
|
215
|
+
# 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
|
216
|
+
def column_letter_to_number(column_letter)
|
217
|
+
pow = -1
|
218
|
+
column_letter.codepoints.reverse.inject(0) do |acc, charcode|
|
219
|
+
pow += 1
|
220
|
+
acc + 26**pow * (charcode - 64)
|
221
|
+
end
|
222
|
+
end
|
194
223
|
|
195
224
|
# Excel doesn't record types for some cells, only its display style, so
|
196
225
|
# we have to back out the type from that style.
|
@@ -208,7 +237,7 @@ module SimpleXlsxReader
|
|
208
237
|
# type.
|
209
238
|
def style_types
|
210
239
|
@style_types ||=
|
211
|
-
xml.styles.xpath('/
|
240
|
+
xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
|
212
241
|
style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
|
213
242
|
end
|
214
243
|
|
@@ -229,7 +258,7 @@ module SimpleXlsxReader
|
|
229
258
|
# ex. {164 => :date_time}
|
230
259
|
def custom_style_types
|
231
260
|
@custom_style_types ||=
|
232
|
-
xml.styles.xpath('/
|
261
|
+
xml.styles.xpath('/styleSheet/numFmts/numFmt').
|
233
262
|
inject({}) do |acc, xstyle|
|
234
263
|
|
235
264
|
acc[xstyle.attributes['numFmtId'].value.to_i] =
|
@@ -321,11 +350,9 @@ module SimpleXlsxReader
|
|
321
350
|
|
322
351
|
if fraction_of_24 # there is a time associated
|
323
352
|
fraction_of_24 = "0.#{fraction_of_24}".to_f
|
324
|
-
|
325
|
-
hour = military.truncate
|
326
|
-
minute = ((military % 1) * 60).truncate
|
353
|
+
seconds = (fraction_of_24 * 86400).round
|
327
354
|
|
328
|
-
return Time.utc(date.year, date.month, date.day
|
355
|
+
return Time.utc(date.year, date.month, date.day) + seconds
|
329
356
|
else
|
330
357
|
return date
|
331
358
|
end
|
@@ -389,12 +416,12 @@ module SimpleXlsxReader
|
|
389
416
|
def shared_strings
|
390
417
|
@shared_strings ||= begin
|
391
418
|
if xml.shared_strings
|
392
|
-
xml.shared_strings.xpath('/
|
419
|
+
xml.shared_strings.xpath('/sst/si').map do |xsst|
|
393
420
|
# a shared string can be a single value...
|
394
|
-
sst = xsst.at_xpath('
|
421
|
+
sst = xsst.at_xpath('t/text()')
|
395
422
|
sst = sst.text if sst
|
396
423
|
# ... or a composite of seperately styled words/characters
|
397
|
-
sst ||= xsst.xpath('
|
424
|
+
sst ||= xsst.xpath('r/t/text()').map(&:text).join
|
398
425
|
end
|
399
426
|
else
|
400
427
|
[]
|
data/simple_xlsx_reader.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.add_dependency 'nokogiri'
|
16
16
|
gem.add_dependency 'rubyzip'
|
17
17
|
|
18
|
-
gem.add_development_dependency 'minitest'
|
18
|
+
gem.add_development_dependency 'minitest', '>= 5.0'
|
19
19
|
gem.add_development_dependency 'pry'
|
20
20
|
|
21
21
|
gem.files = `git ls-files`.split($/)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe SimpleXlsxReader do
|
4
|
+
let(:datetimes_file) { File.join(File.dirname(__FILE__),
|
5
|
+
'datetimes.xlsx') }
|
6
|
+
|
7
|
+
let(:subject) { SimpleXlsxReader::Document.new(datetimes_file) }
|
8
|
+
|
9
|
+
it 'converts date_times with the correct precision' do
|
10
|
+
subject.to_hash.must_equal({
|
11
|
+
"Datetimes" =>
|
12
|
+
[[Time.parse("2013-08-19 18:29:59 UTC")],
|
13
|
+
[Time.parse("2013-08-19 18:30:00 UTC")],
|
14
|
+
[Time.parse("2013-08-19 18:30:01 UTC")]]
|
15
|
+
})
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
data/test/datetimes.xlsx
ADDED
Binary file
|
data/test/performance_test.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
require 'minitest/benchmark'
|
3
3
|
|
4
|
-
describe SimpleXlsxReader do
|
4
|
+
describe 'SimpleXlsxReader Benchmark' do
|
5
5
|
|
6
6
|
# n is 0-indexed for us, then converted to 1-indexed for excel
|
7
7
|
def build_row(n)
|
@@ -58,8 +58,8 @@ describe SimpleXlsxReader do
|
|
58
58
|
</sheetData>
|
59
59
|
</worksheet>
|
60
60
|
XML
|
61
|
-
)
|
62
|
-
base.at_xpath("/
|
61
|
+
).remove_namespaces!
|
62
|
+
base.at_xpath("/worksheet/sheetData").add_child(build_row(0))
|
63
63
|
|
64
64
|
@xml = SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
65
65
|
xml.sheets = [base]
|
@@ -76,7 +76,7 @@ describe SimpleXlsxReader do
|
|
76
76
|
</cellXfs>
|
77
77
|
</styleSheet>
|
78
78
|
XML
|
79
|
-
)
|
79
|
+
).remove_namespaces!
|
80
80
|
end
|
81
81
|
|
82
82
|
# Every new sheet has one more row
|
@@ -84,8 +84,8 @@ describe SimpleXlsxReader do
|
|
84
84
|
sheet = base.clone
|
85
85
|
|
86
86
|
range.times do |n|
|
87
|
-
sheet.xpath("/
|
88
|
-
add_next_sibling(build_row(n))
|
87
|
+
sheet.xpath("/worksheet/sheetData/row").last.
|
88
|
+
add_next_sibling(build_row(n+1))
|
89
89
|
end
|
90
90
|
|
91
91
|
@xml.sheets[range] = sheet
|
@@ -93,19 +93,19 @@ describe SimpleXlsxReader do
|
|
93
93
|
end
|
94
94
|
|
95
95
|
def self.bench_range
|
96
|
-
bench_exp(1,
|
96
|
+
bench_exp(1,10000)
|
97
97
|
end
|
98
98
|
|
99
99
|
bench_performance_linear 'parses sheets in linear time', 0.9999 do |n|
|
100
100
|
|
101
|
-
raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.
|
101
|
+
raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
|
102
102
|
if @xml.sheets[n].nil?
|
103
103
|
|
104
104
|
sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
|
105
105
|
parse_sheet('test', @xml.sheets[n])
|
106
106
|
|
107
|
-
raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.
|
108
|
-
if sheet.rows.
|
107
|
+
raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
|
108
|
+
if sheet.rows.size != n + 1
|
109
109
|
end
|
110
110
|
|
111
111
|
end
|
@@ -44,12 +44,12 @@ describe SimpleXlsxReader do
|
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'reads time styles' do
|
47
|
-
described_class.cast('41505.
|
47
|
+
described_class.cast('41505.77083', nil, :time).
|
48
48
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
49
49
|
end
|
50
50
|
|
51
51
|
it 'reads date_time styles' do
|
52
|
-
described_class.cast('41505.
|
52
|
+
described_class.cast('41505.77083', nil, :date_time).
|
53
53
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
54
54
|
end
|
55
55
|
|
@@ -59,12 +59,12 @@ describe SimpleXlsxReader do
|
|
59
59
|
end
|
60
60
|
|
61
61
|
it 'reads number types styled as times' do
|
62
|
-
described_class.cast('41505.
|
62
|
+
described_class.cast('41505.77083', 'n', :time).
|
63
63
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'reads number types styled as date_times' do
|
67
|
-
described_class.cast('41505.
|
67
|
+
described_class.cast('41505.77083', 'n', :date_time).
|
68
68
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
69
69
|
end
|
70
70
|
end
|
@@ -73,7 +73,7 @@ describe SimpleXlsxReader do
|
|
73
73
|
let(:xml) do
|
74
74
|
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
75
75
|
xml.shared_strings = Nokogiri::XML(File.read(
|
76
|
-
File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
|
76
|
+
File.join(File.dirname(__FILE__), 'shared_strings.xml') )).remove_namespaces!
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
@@ -92,7 +92,7 @@ describe SimpleXlsxReader do
|
|
92
92
|
let(:xml) do
|
93
93
|
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
94
94
|
xml.styles = Nokogiri::XML(File.read(
|
95
|
-
File.join(File.dirname(__FILE__), 'styles.xml') ))
|
95
|
+
File.join(File.dirname(__FILE__), 'styles.xml') )).remove_namespaces!
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
@@ -105,7 +105,7 @@ describe SimpleXlsxReader do
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
108
|
-
describe '#
|
108
|
+
describe '#last_cell_label' do
|
109
109
|
|
110
110
|
let(:generic_style) do
|
111
111
|
Nokogiri::XML(
|
@@ -116,7 +116,7 @@ describe SimpleXlsxReader do
|
|
116
116
|
</cellXfs>
|
117
117
|
</styleSheet>
|
118
118
|
XML
|
119
|
-
)
|
119
|
+
).remove_namespaces!
|
120
120
|
end
|
121
121
|
|
122
122
|
# Note, this is not a valid sheet, since the last cell is actually D1 but
|
@@ -141,7 +141,7 @@ describe SimpleXlsxReader do
|
|
141
141
|
</sheetData>
|
142
142
|
</worksheet>
|
143
143
|
XML
|
144
|
-
)
|
144
|
+
).remove_namespaces!
|
145
145
|
end
|
146
146
|
|
147
147
|
let(:empty_sheet) do
|
@@ -153,7 +153,7 @@ describe SimpleXlsxReader do
|
|
153
153
|
</sheetData>
|
154
154
|
</worksheet>
|
155
155
|
XML
|
156
|
-
)
|
156
|
+
).remove_namespaces!
|
157
157
|
end
|
158
158
|
|
159
159
|
let(:xml) do
|
@@ -166,21 +166,45 @@ describe SimpleXlsxReader do
|
|
166
166
|
subject { described_class.new(xml) }
|
167
167
|
|
168
168
|
it 'uses /worksheet/dimension if available' do
|
169
|
-
subject.
|
169
|
+
subject.last_cell_label(sheet).must_equal 'C1'
|
170
170
|
end
|
171
171
|
|
172
172
|
it 'uses the last header cell if /worksheet/dimension is missing' do
|
173
|
-
sheet.xpath('/
|
174
|
-
subject.
|
173
|
+
sheet.xpath('/worksheet/dimension').remove
|
174
|
+
subject.last_cell_label(sheet).must_equal 'D1'
|
175
175
|
end
|
176
176
|
|
177
|
-
it 'returns "
|
178
|
-
subject.
|
177
|
+
it 'returns "A1" if the dimension is just one cell' do
|
178
|
+
subject.last_cell_label(empty_sheet).must_equal 'A1'
|
179
179
|
end
|
180
180
|
|
181
|
-
it 'returns "
|
182
|
-
sheet.at_xpath('/
|
183
|
-
subject.
|
181
|
+
it 'returns "A1" if the sheet is just one cell, but /worksheet/dimension is missing' do
|
182
|
+
sheet.at_xpath('/worksheet/dimension').remove
|
183
|
+
subject.last_cell_label(empty_sheet).must_equal 'A1'
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
describe '#column_letter_to_number' do
|
188
|
+
let(:subject) { described_class.new }
|
189
|
+
|
190
|
+
[ ['A', 1 ],
|
191
|
+
['B', 2 ],
|
192
|
+
['Z', 26 ],
|
193
|
+
['AA', 27 ],
|
194
|
+
['AB', 28 ],
|
195
|
+
['AZ', 52 ],
|
196
|
+
['BA', 53 ],
|
197
|
+
['BZ', 78 ],
|
198
|
+
['ZZ', 702 ],
|
199
|
+
['AAA', 703 ],
|
200
|
+
['AAZ', 728 ],
|
201
|
+
['ABA', 729 ],
|
202
|
+
['ABZ', 754 ],
|
203
|
+
['AZZ', 1378 ],
|
204
|
+
['ZZZ', 18278] ].each do |(letter, number)|
|
205
|
+
it "converts #{letter} to #{number}" do
|
206
|
+
subject.column_letter_to_number(letter).must_equal number
|
207
|
+
end
|
184
208
|
end
|
185
209
|
end
|
186
210
|
|
@@ -204,7 +228,7 @@ describe SimpleXlsxReader do
|
|
204
228
|
</sheetData>
|
205
229
|
</worksheet>
|
206
230
|
XML
|
207
|
-
)]
|
231
|
+
).remove_namespaces!]
|
208
232
|
|
209
233
|
# s='0' above refers to the value of numFmtId at cellXfs index 0
|
210
234
|
xml.styles = Nokogiri::XML(
|
@@ -215,7 +239,7 @@ describe SimpleXlsxReader do
|
|
215
239
|
</cellXfs>
|
216
240
|
</styleSheet>
|
217
241
|
XML
|
218
|
-
)
|
242
|
+
).remove_namespaces!
|
219
243
|
end
|
220
244
|
end
|
221
245
|
|
@@ -264,7 +288,7 @@ describe SimpleXlsxReader do
|
|
264
288
|
</sheetData>
|
265
289
|
</worksheet>
|
266
290
|
XML
|
267
|
-
)]
|
291
|
+
).remove_namespaces!]
|
268
292
|
|
269
293
|
# s='0' above refers to the value of numFmtId at cellXfs index 0,
|
270
294
|
# which is in this case 'General' type
|
@@ -278,7 +302,7 @@ describe SimpleXlsxReader do
|
|
278
302
|
</cellXfs>
|
279
303
|
</styleSheet>
|
280
304
|
XML
|
281
|
-
)
|
305
|
+
).remove_namespaces!
|
282
306
|
end
|
283
307
|
end
|
284
308
|
|
@@ -318,5 +342,68 @@ describe SimpleXlsxReader do
|
|
318
342
|
@row[6].must_equal 'Cell G1'
|
319
343
|
end
|
320
344
|
end
|
345
|
+
|
346
|
+
describe 'parsing documents with blank rows' do
|
347
|
+
let(:xml) do
|
348
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
349
|
+
xml.sheets = [Nokogiri::XML(
|
350
|
+
<<-XML
|
351
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
352
|
+
<dimension ref="A1:D7" />
|
353
|
+
<sheetData>
|
354
|
+
<row r="2" spans="1:1">
|
355
|
+
<c r="A2" s="0">
|
356
|
+
<v>0</v>
|
357
|
+
</c>
|
358
|
+
</row>
|
359
|
+
<row r="4" spans="1:1">
|
360
|
+
<c r="B4" s="0">
|
361
|
+
<v>1</v>
|
362
|
+
</c>
|
363
|
+
</row>
|
364
|
+
<row r="5" spans="1:1">
|
365
|
+
<c r="C5" s="0">
|
366
|
+
<v>2</v>
|
367
|
+
</c>
|
368
|
+
</row>
|
369
|
+
<row r="7" spans="1:1">
|
370
|
+
<c r="D7" s="0">
|
371
|
+
<v>3</v>
|
372
|
+
</c>
|
373
|
+
</row>
|
374
|
+
</sheetData>
|
375
|
+
</worksheet>
|
376
|
+
XML
|
377
|
+
).remove_namespaces!]
|
378
|
+
|
379
|
+
xml.styles = Nokogiri::XML(
|
380
|
+
<<-XML
|
381
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
382
|
+
<cellXfs count="1">
|
383
|
+
<xf numFmtId="0" />
|
384
|
+
</cellXfs>
|
385
|
+
</styleSheet>
|
386
|
+
XML
|
387
|
+
).remove_namespaces!
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
before do
|
392
|
+
@rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
|
393
|
+
end
|
394
|
+
|
395
|
+
it "reads row data despite gaps in row numbering" do
|
396
|
+
@rows.must_equal [
|
397
|
+
[nil,nil,nil,nil],
|
398
|
+
["0",nil,nil,nil],
|
399
|
+
[nil,nil,nil,nil],
|
400
|
+
[nil,"1",nil,nil],
|
401
|
+
[nil,nil,"2",nil],
|
402
|
+
[nil,nil,nil,nil],
|
403
|
+
[nil,nil,nil,"3"]
|
404
|
+
]
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
321
408
|
end
|
322
409
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,69 +1,69 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rubyzip
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
47
|
+
version: '5.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
54
|
+
version: '5.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: pry
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
description: Read xlsx data the Ruby way
|
@@ -73,7 +73,7 @@ executables: []
|
|
73
73
|
extensions: []
|
74
74
|
extra_rdoc_files: []
|
75
75
|
files:
|
76
|
-
- .gitignore
|
76
|
+
- ".gitignore"
|
77
77
|
- CHANGELOG.md
|
78
78
|
- Gemfile
|
79
79
|
- LICENSE.txt
|
@@ -82,6 +82,8 @@ files:
|
|
82
82
|
- lib/simple_xlsx_reader.rb
|
83
83
|
- lib/simple_xlsx_reader/version.rb
|
84
84
|
- simple_xlsx_reader.gemspec
|
85
|
+
- test/datetime_test.rb
|
86
|
+
- test/datetimes.xlsx
|
85
87
|
- test/performance_test.rb
|
86
88
|
- test/sesame_street_blog.xlsx
|
87
89
|
- test/shared_strings.xml
|
@@ -97,21 +99,23 @@ require_paths:
|
|
97
99
|
- lib
|
98
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
99
101
|
requirements:
|
100
|
-
- -
|
102
|
+
- - ">="
|
101
103
|
- !ruby/object:Gem::Version
|
102
104
|
version: '0'
|
103
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
106
|
requirements:
|
105
|
-
- -
|
107
|
+
- - ">"
|
106
108
|
- !ruby/object:Gem::Version
|
107
|
-
version:
|
109
|
+
version: 1.3.1
|
108
110
|
requirements: []
|
109
111
|
rubyforge_project:
|
110
|
-
rubygems_version: 2.0
|
112
|
+
rubygems_version: 2.2.0
|
111
113
|
signing_key:
|
112
114
|
specification_version: 4
|
113
115
|
summary: Read xlsx data the Ruby way
|
114
116
|
test_files:
|
117
|
+
- test/datetime_test.rb
|
118
|
+
- test/datetimes.xlsx
|
115
119
|
- test/performance_test.rb
|
116
120
|
- test/sesame_street_blog.xlsx
|
117
121
|
- test/shared_strings.xml
|