simple_xlsx_reader 0.9.8 → 1.0.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +7 -0
- data/README.md +12 -1
- data/lib/simple_xlsx_reader.rb +91 -64
- data/lib/simple_xlsx_reader/version.rb +1 -1
- data/simple_xlsx_reader.gemspec +1 -1
- data/test/datetime_test.rb +18 -0
- data/test/datetimes.xlsx +0 -0
- data/test/performance_test.rb +10 -10
- data/test/simple_xlsx_reader_test.rb +109 -22
- data/test/test_helper.rb +1 -1
- metadata +21 -17
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d24749c8713f2f57e47d33a363801333130c5df
|
4
|
+
data.tar.gz: 0b26df686d579163845fd6e06936bff930c52c33
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 523248f26fdf45c2978716836a2d81c5d3a4877bb74f29d7bfa51be1965e1ef61d1e67c83ec98bcac83f7b8e39ac08f666f6d31341428a5d0afa993e149ed7cd
|
7
|
+
data.tar.gz: 710789e5c5dd70d9d360e3a776662dc64138965b54984772cac9785018e2c9605a2b9ea3a2b889ec171183c64610901da527ae91989e9ea149d9016dc8497c71
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -97,7 +97,18 @@ This project follows [semantic versioning 1.0](http://semver.org/spec/v1.0.0.htm
|
|
97
97
|
|
98
98
|
## Contributing
|
99
99
|
|
100
|
-
|
100
|
+
Remember to write tests, think about edge cases, and run the existing
|
101
|
+
suite.
|
102
|
+
|
103
|
+
Note that as of commit 665cbafdde, the most extreme end of the
|
104
|
+
linear-time performance test, which is 10,000 rows (12 columns), runs in
|
105
|
+
~4 seconds on Ruby 2.1 on a 2012 MBP. If the linear time assertion fails
|
106
|
+
or you're way off that, there is probably a performance regression in
|
107
|
+
your code.
|
108
|
+
|
109
|
+
Then, the standard stuff:
|
110
|
+
|
111
|
+
1. Fork this project
|
101
112
|
2. Create your feature branch (`git checkout -b my-new-feature`)
|
102
113
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
103
114
|
4. Push to the branch (`git push origin my-new-feature`)
|
data/lib/simple_xlsx_reader.rb
CHANGED
@@ -74,13 +74,13 @@ module SimpleXlsxReader
|
|
74
74
|
def self.load(file_path)
|
75
75
|
self.new.tap do |xml|
|
76
76
|
SimpleXlsxReader::Zip.open(file_path) do |zip|
|
77
|
-
xml.workbook
|
78
|
-
xml.styles
|
77
|
+
xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
|
78
|
+
xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
|
79
79
|
|
80
80
|
# optional feature used by excel, but not often used by xlsx
|
81
81
|
# generation libraries
|
82
82
|
if zip.file.file?('xl/sharedStrings.xml')
|
83
|
-
xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
|
83
|
+
xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
|
84
84
|
end
|
85
85
|
|
86
86
|
xml.sheets = []
|
@@ -90,7 +90,7 @@ module SimpleXlsxReader
|
|
90
90
|
break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
|
91
91
|
|
92
92
|
xml.sheets <<
|
93
|
-
Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
|
93
|
+
Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
|
94
94
|
end
|
95
95
|
end
|
96
96
|
end
|
@@ -101,14 +101,14 @@ module SimpleXlsxReader
|
|
101
101
|
# For internal use; translates source xml to Sheet objects.
|
102
102
|
class Mapper < Struct.new(:xml)
|
103
103
|
def load_sheets
|
104
|
-
sheet_toc.each_with_index.map do |(sheet_name,
|
104
|
+
sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
|
105
105
|
parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
|
106
106
|
end
|
107
107
|
end
|
108
108
|
|
109
109
|
# Table of contents for the sheets, ex. {'Authors' => 0, ...}
|
110
110
|
def sheet_toc
|
111
|
-
xml.workbook.xpath('/
|
111
|
+
xml.workbook.xpath('/workbook/sheets/sheet').
|
112
112
|
inject({}) do |acc, sheet|
|
113
113
|
|
114
114
|
acc[sheet.attributes['name'].value] =
|
@@ -120,52 +120,55 @@ module SimpleXlsxReader
|
|
120
120
|
|
121
121
|
def parse_sheet(sheet_name, xsheet)
|
122
122
|
sheet = Sheet.new(sheet_name)
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
xcell.text.strip
|
164
|
-
end
|
123
|
+
sheet_width, sheet_height = *sheet_dimensions(xsheet)
|
124
|
+
|
125
|
+
sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
|
126
|
+
xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
|
127
|
+
column, row = *xcell.attr('r').match(/([A-Z]+)([0-9]+)/).captures
|
128
|
+
col_idx = column_letter_to_number(column) - 1
|
129
|
+
row_idx = row.to_i - 1
|
130
|
+
|
131
|
+
type = xcell.attributes['t'] &&
|
132
|
+
xcell.attributes['t'].value
|
133
|
+
style = xcell.attributes['s'] &&
|
134
|
+
style_types[xcell.attributes['s'].value.to_i]
|
135
|
+
|
136
|
+
# This is the main performance bottleneck. Using just 'xcell.text'
|
137
|
+
# would be ideal, and makes parsing super-fast. However, there's
|
138
|
+
# other junk in the cell, formula references in particular,
|
139
|
+
# so we really do have to look for specific value nodes.
|
140
|
+
# Maybe there is a really clever way to use xcell.text and parse out
|
141
|
+
# the correct value, but I can't think of one, or an alternative
|
142
|
+
# strategy.
|
143
|
+
#
|
144
|
+
# And yes, this really is faster than using xcell.at_xpath(...),
|
145
|
+
# by about 60%. Odd.
|
146
|
+
xvalue = type == 'inlineStr' ?
|
147
|
+
(xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
|
148
|
+
xcell.children.find {|c| c.name == 'v'}
|
149
|
+
|
150
|
+
cell = begin
|
151
|
+
self.class.cast(xvalue && xvalue.text.strip, type, style,
|
152
|
+
:shared_strings => shared_strings)
|
153
|
+
rescue => e
|
154
|
+
if !SimpleXlsxReader.configuration.catch_cell_load_errors
|
155
|
+
error = CellLoadError.new(
|
156
|
+
"Row #{row_idx}, Col #{col_idx}: #{e.message}")
|
157
|
+
error.set_backtrace(e.backtrace)
|
158
|
+
raise error
|
159
|
+
else
|
160
|
+
sheet.load_errors[[row_idx, col_idx]] = e.message
|
161
|
+
|
162
|
+
xcell.text.strip
|
165
163
|
end
|
166
164
|
end
|
167
165
|
|
168
|
-
|
166
|
+
# This shouldn't be necessary, but just in case, we'll create
|
167
|
+
# the row so we don't blow up. This means any null rows in between
|
168
|
+
# will be null instead of [null, null, ...]
|
169
|
+
sheet.rows[row_idx] ||= Array.new(sheet_width)
|
170
|
+
|
171
|
+
sheet.rows[row_idx][col_idx] = cell
|
169
172
|
end
|
170
173
|
|
171
174
|
sheet
|
@@ -180,17 +183,43 @@ module SimpleXlsxReader
|
|
180
183
|
# and check the column name of the last header row. Obviously this isn't
|
181
184
|
# the most robust strategy, but it likely fits 99% of use cases
|
182
185
|
# considering it's not a problem with actual excel docs.
|
183
|
-
def
|
184
|
-
dimension = xsheet.at_xpath('/
|
186
|
+
def last_cell_label(xsheet)
|
187
|
+
dimension = xsheet.at_xpath('/worksheet/dimension')
|
185
188
|
if dimension
|
186
|
-
col = dimension.attributes['ref'].value.match(/:([A-Z]
|
187
|
-
col ? col.captures.first : '
|
189
|
+
col = dimension.attributes['ref'].value.match(/:([A-Z]+[0-9]+)/)
|
190
|
+
col ? col.captures.first : 'A1'
|
188
191
|
else
|
189
|
-
last = xsheet.at_xpath("/
|
190
|
-
last ? last.attributes['r'].value.match(/([A-Z]
|
192
|
+
last = xsheet.at_xpath("/worksheet/sheetData/row[last()]/c[last()]")
|
193
|
+
last ? last.attributes['r'].value.match(/([A-Z]+[0-9]+)/).captures.first : 'A1'
|
191
194
|
end
|
192
195
|
end
|
193
196
|
|
197
|
+
# Returns dimensions (1-indexed)
|
198
|
+
def sheet_dimensions(xsheet)
|
199
|
+
column, row = *last_cell_label(xsheet).match(/([A-Z]+)([0-9]+)/).captures
|
200
|
+
[column_letter_to_number(column), row.to_i]
|
201
|
+
end
|
202
|
+
|
203
|
+
# formula fits an exponential factorial function of the form:
|
204
|
+
# 'A' = 1
|
205
|
+
# 'B' = 2
|
206
|
+
# 'Z' = 26
|
207
|
+
# 'AA' = 26 * 1 + 1
|
208
|
+
# 'AZ' = 26 * 1 + 26
|
209
|
+
# 'BA' = 26 * 2 + 1
|
210
|
+
# 'ZA' = 26 * 26 + 1
|
211
|
+
# 'ZZ' = 26 * 26 + 26
|
212
|
+
# 'AAA' = 26 * 26 * 1 + 26 * 1 + 1
|
213
|
+
# 'AAZ' = 26 * 26 * 1 + 26 * 1 + 26
|
214
|
+
# 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
|
215
|
+
# 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
|
216
|
+
def column_letter_to_number(column_letter)
|
217
|
+
pow = -1
|
218
|
+
column_letter.codepoints.reverse.inject(0) do |acc, charcode|
|
219
|
+
pow += 1
|
220
|
+
acc + 26**pow * (charcode - 64)
|
221
|
+
end
|
222
|
+
end
|
194
223
|
|
195
224
|
# Excel doesn't record types for some cells, only its display style, so
|
196
225
|
# we have to back out the type from that style.
|
@@ -208,7 +237,7 @@ module SimpleXlsxReader
|
|
208
237
|
# type.
|
209
238
|
def style_types
|
210
239
|
@style_types ||=
|
211
|
-
xml.styles.xpath('/
|
240
|
+
xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
|
212
241
|
style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
|
213
242
|
end
|
214
243
|
|
@@ -229,7 +258,7 @@ module SimpleXlsxReader
|
|
229
258
|
# ex. {164 => :date_time}
|
230
259
|
def custom_style_types
|
231
260
|
@custom_style_types ||=
|
232
|
-
xml.styles.xpath('/
|
261
|
+
xml.styles.xpath('/styleSheet/numFmts/numFmt').
|
233
262
|
inject({}) do |acc, xstyle|
|
234
263
|
|
235
264
|
acc[xstyle.attributes['numFmtId'].value.to_i] =
|
@@ -321,11 +350,9 @@ module SimpleXlsxReader
|
|
321
350
|
|
322
351
|
if fraction_of_24 # there is a time associated
|
323
352
|
fraction_of_24 = "0.#{fraction_of_24}".to_f
|
324
|
-
|
325
|
-
hour = military.truncate
|
326
|
-
minute = ((military % 1) * 60).truncate
|
353
|
+
seconds = (fraction_of_24 * 86400).round
|
327
354
|
|
328
|
-
return Time.utc(date.year, date.month, date.day
|
355
|
+
return Time.utc(date.year, date.month, date.day) + seconds
|
329
356
|
else
|
330
357
|
return date
|
331
358
|
end
|
@@ -389,12 +416,12 @@ module SimpleXlsxReader
|
|
389
416
|
def shared_strings
|
390
417
|
@shared_strings ||= begin
|
391
418
|
if xml.shared_strings
|
392
|
-
xml.shared_strings.xpath('/
|
419
|
+
xml.shared_strings.xpath('/sst/si').map do |xsst|
|
393
420
|
# a shared string can be a single value...
|
394
|
-
sst = xsst.at_xpath('
|
421
|
+
sst = xsst.at_xpath('t/text()')
|
395
422
|
sst = sst.text if sst
|
396
423
|
# ... or a composite of seperately styled words/characters
|
397
|
-
sst ||= xsst.xpath('
|
424
|
+
sst ||= xsst.xpath('r/t/text()').map(&:text).join
|
398
425
|
end
|
399
426
|
else
|
400
427
|
[]
|
data/simple_xlsx_reader.gemspec
CHANGED
@@ -15,7 +15,7 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.add_dependency 'nokogiri'
|
16
16
|
gem.add_dependency 'rubyzip'
|
17
17
|
|
18
|
-
gem.add_development_dependency 'minitest'
|
18
|
+
gem.add_development_dependency 'minitest', '>= 5.0'
|
19
19
|
gem.add_development_dependency 'pry'
|
20
20
|
|
21
21
|
gem.files = `git ls-files`.split($/)
|
@@ -0,0 +1,18 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
describe SimpleXlsxReader do
|
4
|
+
let(:datetimes_file) { File.join(File.dirname(__FILE__),
|
5
|
+
'datetimes.xlsx') }
|
6
|
+
|
7
|
+
let(:subject) { SimpleXlsxReader::Document.new(datetimes_file) }
|
8
|
+
|
9
|
+
it 'converts date_times with the correct precision' do
|
10
|
+
subject.to_hash.must_equal({
|
11
|
+
"Datetimes" =>
|
12
|
+
[[Time.parse("2013-08-19 18:29:59 UTC")],
|
13
|
+
[Time.parse("2013-08-19 18:30:00 UTC")],
|
14
|
+
[Time.parse("2013-08-19 18:30:01 UTC")]]
|
15
|
+
})
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
data/test/datetimes.xlsx
ADDED
Binary file
|
data/test/performance_test.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'test_helper'
|
2
2
|
require 'minitest/benchmark'
|
3
3
|
|
4
|
-
describe SimpleXlsxReader do
|
4
|
+
describe 'SimpleXlsxReader Benchmark' do
|
5
5
|
|
6
6
|
# n is 0-indexed for us, then converted to 1-indexed for excel
|
7
7
|
def build_row(n)
|
@@ -58,8 +58,8 @@ describe SimpleXlsxReader do
|
|
58
58
|
</sheetData>
|
59
59
|
</worksheet>
|
60
60
|
XML
|
61
|
-
)
|
62
|
-
base.at_xpath("/
|
61
|
+
).remove_namespaces!
|
62
|
+
base.at_xpath("/worksheet/sheetData").add_child(build_row(0))
|
63
63
|
|
64
64
|
@xml = SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
65
65
|
xml.sheets = [base]
|
@@ -76,7 +76,7 @@ describe SimpleXlsxReader do
|
|
76
76
|
</cellXfs>
|
77
77
|
</styleSheet>
|
78
78
|
XML
|
79
|
-
)
|
79
|
+
).remove_namespaces!
|
80
80
|
end
|
81
81
|
|
82
82
|
# Every new sheet has one more row
|
@@ -84,8 +84,8 @@ describe SimpleXlsxReader do
|
|
84
84
|
sheet = base.clone
|
85
85
|
|
86
86
|
range.times do |n|
|
87
|
-
sheet.xpath("/
|
88
|
-
add_next_sibling(build_row(n))
|
87
|
+
sheet.xpath("/worksheet/sheetData/row").last.
|
88
|
+
add_next_sibling(build_row(n+1))
|
89
89
|
end
|
90
90
|
|
91
91
|
@xml.sheets[range] = sheet
|
@@ -93,19 +93,19 @@ describe SimpleXlsxReader do
|
|
93
93
|
end
|
94
94
|
|
95
95
|
def self.bench_range
|
96
|
-
bench_exp(1,
|
96
|
+
bench_exp(1,10000)
|
97
97
|
end
|
98
98
|
|
99
99
|
bench_performance_linear 'parses sheets in linear time', 0.9999 do |n|
|
100
100
|
|
101
|
-
raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.
|
101
|
+
raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
|
102
102
|
if @xml.sheets[n].nil?
|
103
103
|
|
104
104
|
sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
|
105
105
|
parse_sheet('test', @xml.sheets[n])
|
106
106
|
|
107
|
-
raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.
|
108
|
-
if sheet.rows.
|
107
|
+
raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
|
108
|
+
if sheet.rows.size != n + 1
|
109
109
|
end
|
110
110
|
|
111
111
|
end
|
@@ -44,12 +44,12 @@ describe SimpleXlsxReader do
|
|
44
44
|
end
|
45
45
|
|
46
46
|
it 'reads time styles' do
|
47
|
-
described_class.cast('41505.
|
47
|
+
described_class.cast('41505.77083', nil, :time).
|
48
48
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
49
49
|
end
|
50
50
|
|
51
51
|
it 'reads date_time styles' do
|
52
|
-
described_class.cast('41505.
|
52
|
+
described_class.cast('41505.77083', nil, :date_time).
|
53
53
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
54
54
|
end
|
55
55
|
|
@@ -59,12 +59,12 @@ describe SimpleXlsxReader do
|
|
59
59
|
end
|
60
60
|
|
61
61
|
it 'reads number types styled as times' do
|
62
|
-
described_class.cast('41505.
|
62
|
+
described_class.cast('41505.77083', 'n', :time).
|
63
63
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
64
64
|
end
|
65
65
|
|
66
66
|
it 'reads number types styled as date_times' do
|
67
|
-
described_class.cast('41505.
|
67
|
+
described_class.cast('41505.77083', 'n', :date_time).
|
68
68
|
must_equal Time.parse('2013-08-19 18:30 UTC')
|
69
69
|
end
|
70
70
|
end
|
@@ -73,7 +73,7 @@ describe SimpleXlsxReader do
|
|
73
73
|
let(:xml) do
|
74
74
|
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
75
75
|
xml.shared_strings = Nokogiri::XML(File.read(
|
76
|
-
File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
|
76
|
+
File.join(File.dirname(__FILE__), 'shared_strings.xml') )).remove_namespaces!
|
77
77
|
end
|
78
78
|
end
|
79
79
|
|
@@ -92,7 +92,7 @@ describe SimpleXlsxReader do
|
|
92
92
|
let(:xml) do
|
93
93
|
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
94
94
|
xml.styles = Nokogiri::XML(File.read(
|
95
|
-
File.join(File.dirname(__FILE__), 'styles.xml') ))
|
95
|
+
File.join(File.dirname(__FILE__), 'styles.xml') )).remove_namespaces!
|
96
96
|
end
|
97
97
|
end
|
98
98
|
|
@@ -105,7 +105,7 @@ describe SimpleXlsxReader do
|
|
105
105
|
end
|
106
106
|
end
|
107
107
|
|
108
|
-
describe '#
|
108
|
+
describe '#last_cell_label' do
|
109
109
|
|
110
110
|
let(:generic_style) do
|
111
111
|
Nokogiri::XML(
|
@@ -116,7 +116,7 @@ describe SimpleXlsxReader do
|
|
116
116
|
</cellXfs>
|
117
117
|
</styleSheet>
|
118
118
|
XML
|
119
|
-
)
|
119
|
+
).remove_namespaces!
|
120
120
|
end
|
121
121
|
|
122
122
|
# Note, this is not a valid sheet, since the last cell is actually D1 but
|
@@ -141,7 +141,7 @@ describe SimpleXlsxReader do
|
|
141
141
|
</sheetData>
|
142
142
|
</worksheet>
|
143
143
|
XML
|
144
|
-
)
|
144
|
+
).remove_namespaces!
|
145
145
|
end
|
146
146
|
|
147
147
|
let(:empty_sheet) do
|
@@ -153,7 +153,7 @@ describe SimpleXlsxReader do
|
|
153
153
|
</sheetData>
|
154
154
|
</worksheet>
|
155
155
|
XML
|
156
|
-
)
|
156
|
+
).remove_namespaces!
|
157
157
|
end
|
158
158
|
|
159
159
|
let(:xml) do
|
@@ -166,21 +166,45 @@ describe SimpleXlsxReader do
|
|
166
166
|
subject { described_class.new(xml) }
|
167
167
|
|
168
168
|
it 'uses /worksheet/dimension if available' do
|
169
|
-
subject.
|
169
|
+
subject.last_cell_label(sheet).must_equal 'C1'
|
170
170
|
end
|
171
171
|
|
172
172
|
it 'uses the last header cell if /worksheet/dimension is missing' do
|
173
|
-
sheet.xpath('/
|
174
|
-
subject.
|
173
|
+
sheet.xpath('/worksheet/dimension').remove
|
174
|
+
subject.last_cell_label(sheet).must_equal 'D1'
|
175
175
|
end
|
176
176
|
|
177
|
-
it 'returns "
|
178
|
-
subject.
|
177
|
+
it 'returns "A1" if the dimension is just one cell' do
|
178
|
+
subject.last_cell_label(empty_sheet).must_equal 'A1'
|
179
179
|
end
|
180
180
|
|
181
|
-
it 'returns "
|
182
|
-
sheet.at_xpath('/
|
183
|
-
subject.
|
181
|
+
it 'returns "A1" if the sheet is just one cell, but /worksheet/dimension is missing' do
|
182
|
+
sheet.at_xpath('/worksheet/dimension').remove
|
183
|
+
subject.last_cell_label(empty_sheet).must_equal 'A1'
|
184
|
+
end
|
185
|
+
end
|
186
|
+
|
187
|
+
describe '#column_letter_to_number' do
|
188
|
+
let(:subject) { described_class.new }
|
189
|
+
|
190
|
+
[ ['A', 1 ],
|
191
|
+
['B', 2 ],
|
192
|
+
['Z', 26 ],
|
193
|
+
['AA', 27 ],
|
194
|
+
['AB', 28 ],
|
195
|
+
['AZ', 52 ],
|
196
|
+
['BA', 53 ],
|
197
|
+
['BZ', 78 ],
|
198
|
+
['ZZ', 702 ],
|
199
|
+
['AAA', 703 ],
|
200
|
+
['AAZ', 728 ],
|
201
|
+
['ABA', 729 ],
|
202
|
+
['ABZ', 754 ],
|
203
|
+
['AZZ', 1378 ],
|
204
|
+
['ZZZ', 18278] ].each do |(letter, number)|
|
205
|
+
it "converts #{letter} to #{number}" do
|
206
|
+
subject.column_letter_to_number(letter).must_equal number
|
207
|
+
end
|
184
208
|
end
|
185
209
|
end
|
186
210
|
|
@@ -204,7 +228,7 @@ describe SimpleXlsxReader do
|
|
204
228
|
</sheetData>
|
205
229
|
</worksheet>
|
206
230
|
XML
|
207
|
-
)]
|
231
|
+
).remove_namespaces!]
|
208
232
|
|
209
233
|
# s='0' above refers to the value of numFmtId at cellXfs index 0
|
210
234
|
xml.styles = Nokogiri::XML(
|
@@ -215,7 +239,7 @@ describe SimpleXlsxReader do
|
|
215
239
|
</cellXfs>
|
216
240
|
</styleSheet>
|
217
241
|
XML
|
218
|
-
)
|
242
|
+
).remove_namespaces!
|
219
243
|
end
|
220
244
|
end
|
221
245
|
|
@@ -264,7 +288,7 @@ describe SimpleXlsxReader do
|
|
264
288
|
</sheetData>
|
265
289
|
</worksheet>
|
266
290
|
XML
|
267
|
-
)]
|
291
|
+
).remove_namespaces!]
|
268
292
|
|
269
293
|
# s='0' above refers to the value of numFmtId at cellXfs index 0,
|
270
294
|
# which is in this case 'General' type
|
@@ -278,7 +302,7 @@ describe SimpleXlsxReader do
|
|
278
302
|
</cellXfs>
|
279
303
|
</styleSheet>
|
280
304
|
XML
|
281
|
-
)
|
305
|
+
).remove_namespaces!
|
282
306
|
end
|
283
307
|
end
|
284
308
|
|
@@ -318,5 +342,68 @@ describe SimpleXlsxReader do
|
|
318
342
|
@row[6].must_equal 'Cell G1'
|
319
343
|
end
|
320
344
|
end
|
345
|
+
|
346
|
+
describe 'parsing documents with blank rows' do
|
347
|
+
let(:xml) do
|
348
|
+
SimpleXlsxReader::Document::Xml.new.tap do |xml|
|
349
|
+
xml.sheets = [Nokogiri::XML(
|
350
|
+
<<-XML
|
351
|
+
<worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
352
|
+
<dimension ref="A1:D7" />
|
353
|
+
<sheetData>
|
354
|
+
<row r="2" spans="1:1">
|
355
|
+
<c r="A2" s="0">
|
356
|
+
<v>0</v>
|
357
|
+
</c>
|
358
|
+
</row>
|
359
|
+
<row r="4" spans="1:1">
|
360
|
+
<c r="B4" s="0">
|
361
|
+
<v>1</v>
|
362
|
+
</c>
|
363
|
+
</row>
|
364
|
+
<row r="5" spans="1:1">
|
365
|
+
<c r="C5" s="0">
|
366
|
+
<v>2</v>
|
367
|
+
</c>
|
368
|
+
</row>
|
369
|
+
<row r="7" spans="1:1">
|
370
|
+
<c r="D7" s="0">
|
371
|
+
<v>3</v>
|
372
|
+
</c>
|
373
|
+
</row>
|
374
|
+
</sheetData>
|
375
|
+
</worksheet>
|
376
|
+
XML
|
377
|
+
).remove_namespaces!]
|
378
|
+
|
379
|
+
xml.styles = Nokogiri::XML(
|
380
|
+
<<-XML
|
381
|
+
<styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
382
|
+
<cellXfs count="1">
|
383
|
+
<xf numFmtId="0" />
|
384
|
+
</cellXfs>
|
385
|
+
</styleSheet>
|
386
|
+
XML
|
387
|
+
).remove_namespaces!
|
388
|
+
end
|
389
|
+
end
|
390
|
+
|
391
|
+
before do
|
392
|
+
@rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
|
393
|
+
end
|
394
|
+
|
395
|
+
it "reads row data despite gaps in row numbering" do
|
396
|
+
@rows.must_equal [
|
397
|
+
[nil,nil,nil,nil],
|
398
|
+
["0",nil,nil,nil],
|
399
|
+
[nil,nil,nil,nil],
|
400
|
+
[nil,"1",nil,nil],
|
401
|
+
[nil,nil,"2",nil],
|
402
|
+
[nil,nil,nil,nil],
|
403
|
+
[nil,nil,nil,"3"]
|
404
|
+
]
|
405
|
+
end
|
406
|
+
end
|
407
|
+
|
321
408
|
end
|
322
409
|
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,69 +1,69 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_xlsx_reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0.pre
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Woody Peterson
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-06-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '0'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- -
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rubyzip
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: minitest
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- -
|
45
|
+
- - ">="
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
47
|
+
version: '5.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- -
|
52
|
+
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
54
|
+
version: '5.0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
56
|
name: pry
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: '0'
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
|
-
- -
|
66
|
+
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
description: Read xlsx data the Ruby way
|
@@ -73,7 +73,7 @@ executables: []
|
|
73
73
|
extensions: []
|
74
74
|
extra_rdoc_files: []
|
75
75
|
files:
|
76
|
-
- .gitignore
|
76
|
+
- ".gitignore"
|
77
77
|
- CHANGELOG.md
|
78
78
|
- Gemfile
|
79
79
|
- LICENSE.txt
|
@@ -82,6 +82,8 @@ files:
|
|
82
82
|
- lib/simple_xlsx_reader.rb
|
83
83
|
- lib/simple_xlsx_reader/version.rb
|
84
84
|
- simple_xlsx_reader.gemspec
|
85
|
+
- test/datetime_test.rb
|
86
|
+
- test/datetimes.xlsx
|
85
87
|
- test/performance_test.rb
|
86
88
|
- test/sesame_street_blog.xlsx
|
87
89
|
- test/shared_strings.xml
|
@@ -97,21 +99,23 @@ require_paths:
|
|
97
99
|
- lib
|
98
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
99
101
|
requirements:
|
100
|
-
- -
|
102
|
+
- - ">="
|
101
103
|
- !ruby/object:Gem::Version
|
102
104
|
version: '0'
|
103
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
104
106
|
requirements:
|
105
|
-
- -
|
107
|
+
- - ">"
|
106
108
|
- !ruby/object:Gem::Version
|
107
|
-
version:
|
109
|
+
version: 1.3.1
|
108
110
|
requirements: []
|
109
111
|
rubyforge_project:
|
110
|
-
rubygems_version: 2.0
|
112
|
+
rubygems_version: 2.2.0
|
111
113
|
signing_key:
|
112
114
|
specification_version: 4
|
113
115
|
summary: Read xlsx data the Ruby way
|
114
116
|
test_files:
|
117
|
+
- test/datetime_test.rb
|
118
|
+
- test/datetimes.xlsx
|
115
119
|
- test/performance_test.rb
|
116
120
|
- test/sesame_street_blog.xlsx
|
117
121
|
- test/shared_strings.xml
|