simple_xlsx_reader 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in simple_xlsx_reader.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Woody Peterson
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+ # SimpleXlsxReader
2
+
3
+ An xlsx reader for Ruby that parses xlsx cell values into plain ruby
4
+ primitives and dates/times.
5
+
6
+ This is *not* a rewrite of excel in Ruby. Font styles, for
7
+ example, are parsed to determine whether a cell is a number or a date,
8
+ then forgotten. We just want to get the data, and get out!
9
+
10
+ ## Usage
11
+
12
+ ### Summary:
13
+
14
+ doc = SimpleXlsxReader.open('/path/to/workbook.xlsx')
15
+ doc.sheets # => [<#SXR::Sheet>, ...]
16
+ doc.sheets.first.name # 'Sheet1'
17
+ doc.sheets.first.rows # [['Header 1', 'Header 2', ...]
18
+ ['foo', 2, ...]]
19
+
20
+ That's it!
21
+
22
+ ### Load Errors
23
+
24
+ By default, cell load errors (ex. if a date cell contains the string
25
+ 'hello') result in a SimpleXlsxReader::CellLoadError.
26
+
27
+ If you would like to provide better error feedback to your users, you
28
+ can set `SimpleXlsxReader.configuration.catch_cell_load_errors =
29
+ true`, and load errors will instead be inserted into Sheet#load_errors keyed
30
+ by [rownum, colnum].
31
+
32
+ ### More
33
+
34
+ Here's the totality of the public api, in code:
35
+
36
+ module SimpleXlsxReader
37
+ def self.open(file_path)
38
+ Document.new(file_path).tap(&:sheets)
39
+ end
40
+
41
+ class Document
42
+ attr_reader :file_path
43
+
44
+ def initialize(file_path)
45
+ @file_path = file_path
46
+ end
47
+
48
+ def sheets
49
+ @sheets ||= Mapper.new(xml).load_sheets
50
+ end
51
+
52
+ def to_hash
53
+ sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
54
+ end
55
+
56
+ def xml
57
+ Xml.load(file_path)
58
+ end
59
+
60
+ class Sheet < Struct.new(:name, :rows)
61
+ def headers
62
+ rows[0]
63
+ end
64
+
65
+ def data
66
+ rows[1..-1]
67
+ end
68
+
69
+ # Load errors will be a hash of the form:
70
+ # {
71
+ # [rownum, colnum] => '[error]'
72
+ # }
73
+ def load_errors
74
+ @load_errors ||= {}
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ ## Installation
81
+
82
+ Add this line to your application's Gemfile:
83
+
84
+ gem 'simple_xlsx_reader'
85
+
86
+ And then execute:
87
+
88
+ $ bundle
89
+
90
+ Or install it yourself as:
91
+
92
+ $ gem install simple_xlsx_reader
93
+
94
+ ## Contributing
95
+
96
+ 1. Fork it
97
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
98
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
99
+ 4. Push to the branch (`git push origin my-new-feature`)
100
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "test/**/*_test.rb"
6
+ end
7
+
8
+ task :default => [:test]
@@ -0,0 +1,354 @@
1
+ require "simple_xlsx_reader/version"
2
+ require 'nokogiri'
3
+ require 'zip/zip'
4
+ require 'zip/zipfilesystem'
5
+ require 'date'
6
+
7
+ module SimpleXlsxReader
8
+ class CellLoadError < StandardError; end
9
+
10
+ def self.configuration
11
+ @configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
12
+ c.catch_cell_load_errors = false
13
+ end
14
+ end
15
+
16
+ def self.open(file_path)
17
+ Document.new(file_path).tap(&:sheets)
18
+ end
19
+
20
+ class Document
21
+ attr_reader :file_path
22
+
23
+ def initialize(file_path)
24
+ @file_path = file_path
25
+ end
26
+
27
+ def sheets
28
+ @sheets ||= Mapper.new(xml).load_sheets
29
+ end
30
+
31
+ def to_hash
32
+ sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
33
+ end
34
+
35
+ def xml
36
+ Xml.load(file_path)
37
+ end
38
+
39
+ class Sheet < Struct.new(:name, :rows)
40
+ def headers
41
+ rows[0]
42
+ end
43
+
44
+ def data
45
+ rows[1..-1]
46
+ end
47
+
48
+ # Load errors will be a hash of the form:
49
+ # {
50
+ # [rownum, colnum] => '[error]'
51
+ # }
52
+ def load_errors
53
+ @load_errors ||= {}
54
+ end
55
+ end
56
+
57
+ ##
58
+ # For internal use; stores source xml in nokogiri documents
59
+ class Xml
60
+ attr_accessor :workbook, :shared_strings, :sheets, :styles
61
+
62
+ def self.load(file_path)
63
+ self.new.tap do |xml|
64
+ Zip::ZipFile.open(file_path) do |zip|
65
+ xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml'))
66
+ xml.styles = Nokogiri::XML(zip.read('xl/styles.xml'))
67
+
68
+ # optional feature used by excel, but not often used by xlsx
69
+ # generation libraries
70
+ if zip.file.file?('xl/sharedStrings.xml')
71
+ xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
72
+ end
73
+
74
+ xml.sheets = []
75
+ i = 0
76
+ loop do
77
+ i += 1
78
+ break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
79
+
80
+ xml.sheets <<
81
+ Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ ##
89
+ # For internal use; translates source xml to Sheet objects.
90
+ class Mapper < Struct.new(:xml)
91
+ def load_sheets
92
+ sheet_toc.map do |(sheet_name, sheet_number)|
93
+ parse_sheet(sheet_name, xml.sheets[sheet_number])
94
+ end
95
+ end
96
+
97
+ # Table of contents for the sheets, ex. {'Authors' => 0, ...}
98
+ def sheet_toc
99
+ xml.workbook.xpath('/xmlns:workbook/xmlns:sheets/xmlns:sheet').
100
+ inject({}) do |acc, sheet|
101
+
102
+ acc[sheet.attributes['name'].value] =
103
+ sheet.attributes['sheetId'].value.to_i - 1 # keep things 0-indexed
104
+
105
+ acc
106
+ end
107
+ end
108
+
109
+ def parse_sheet(sheet_name, xsheet)
110
+ sheet = Sheet.new(sheet_name)
111
+
112
+ rownum = -1
113
+ sheet.rows =
114
+ xsheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").map do |xrow|
115
+ rownum += 1
116
+
117
+ colnum = -1
118
+ xrow.xpath('xmlns:c').map do |xcell|
119
+ colnum += 1
120
+
121
+ type = xcell.attributes['t'] &&
122
+ xcell.attributes['t'].value
123
+ # If not the above, attempt to determine from a custom style
124
+ type ||= xcell.attributes['s'] &&
125
+ style_types[xcell.attributes['s'].value.to_i]
126
+
127
+ begin
128
+ self.class.cast(xcell.text, type, shared_strings: shared_strings)
129
+ rescue => e
130
+ if !SimpleXlsxReader.configuration.catch_cell_load_errors
131
+ error = CellLoadError.new(
132
+ "Row #{rownum}, Col #{colnum}: #{e.message}")
133
+ error.set_backtrace(e.backtrace)
134
+ raise error
135
+ else
136
+ sheet.load_errors[[rownum, colnum]] = e.message
137
+
138
+ xcell.text
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ sheet
145
+ end
146
+
147
+ # Excel doesn't record types for some cells, only its display style, so
148
+ # we have to back out the type from that style.
149
+ #
150
+ # Some of these styles can be determined from a known set (see NumFmtMap),
151
+ # while others are 'custom' and we have to make a best guess.
152
+ #
153
+ # This is the array of types corresponding to the styles a spreadsheet
154
+ # uses, and includes both the known style types and the custom styles.
155
+ #
156
+ # Note that the xml sheet cells that use this don't reference the
157
+ # numFmtId, but instead the array index of a style in the stored list of
158
+ # only the styles used in the spreadsheet (which can be either known or
159
+ # custom). Hence this style types array, rather than a map of numFmtId to
160
+ # type.
161
+ def style_types
162
+ @style_types ||=
163
+ xml.styles.xpath('/xmlns:styleSheet/xmlns:cellXfs/xmlns:xf').map {|xstyle|
164
+ style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
165
+ end
166
+
167
+ # Finds the type we think a style is; For example, fmtId 14 is a date
168
+ # style, so this would return :date
169
+ def style_type_by_num_fmt_id(id)
170
+ return nil if id.nil?
171
+
172
+ id = id.to_i
173
+ if id > 164 # custom style, arg!
174
+ custom_style_types[id]
175
+ else # we should know this one
176
+ NumFmtMap[id]
177
+ end
178
+ end
179
+
180
+ # Map of (numFmtId > 164) (custom styles) to our best guess at the type
181
+ # ex. {165 => :date_time}
182
+ def custom_style_types
183
+ @custom_style_types ||=
184
+ xml.styles.xpath('/xmlns:styleSheet/xmlns:numFmts/xmlns:numFmt').
185
+ inject({}) do |acc, xstyle|
186
+
187
+ acc[xstyle.attributes['numFmtId'].value.to_i] =
188
+ determine_custom_style_type(xstyle.attributes['formatCode'].value)
189
+
190
+ acc
191
+ end
192
+ end
193
+
194
+ # This is the least deterministic part of reading xlsx files. Due to
195
+ # custom styles, you can't know for sure when a date is a date other than
196
+ # looking at its format and gessing. It's not impossible to guess right,
197
+ # though.
198
+ #
199
+ # http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
200
+ def determine_custom_style_type(string)
201
+ return :float if string[0] == '_'
202
+ return :float if string[0] == ' 0'
203
+
204
+ # Looks for one of ymdhis outside of meta-stuff like [Red]
205
+ return :date_time if string =~ /(^|\])[^\[]*[ymdhis]/i
206
+
207
+ return :unsupported
208
+ end
209
+
210
+ ##
211
+ # The heart of typecasting. The ruby type is determined either explicitly
212
+ # from the cell xml or implicitly from the cell style, and this
213
+ # method expects that work to have been done already. This, then,
214
+ # takes the type we determined it to be and casts the cell value
215
+ # to that type.
216
+ #
217
+ # types:
218
+ # - s: shared string (see #shared_string)
219
+ # - n: number (cast to a float)
220
+ # - b: boolean
221
+ # - str: string
222
+ # - inlineStr: string
223
+ # - ruby symbol: for when type has been determined by style
224
+ #
225
+ # options:
226
+ # - shared_strings: needed for 's' (shared string) type
227
+ def self.cast(value, type, options = {})
228
+ return nil if value.nil? || value.empty?
229
+
230
+ case type
231
+
232
+ ##
233
+ # There are few built-in types
234
+ ##
235
+
236
+ when 's' # shared string
237
+ options[:shared_strings][value.to_i]
238
+ when 'n' # number
239
+ value.to_f
240
+ when 'b'
241
+ value.to_i == 1
242
+ when 'str'
243
+ value
244
+ when 'inlineStr'
245
+ value
246
+
247
+ ##
248
+ # Type can also be determined by a style,
249
+ # detected earlier and cast here by its standardized symbol
250
+ ##
251
+
252
+ when :string, :unsupported
253
+ value
254
+ when :fixnum
255
+ value.to_i
256
+ when :float
257
+ value.to_f
258
+ when :percentage
259
+ value.to_f / 100
260
+ # the trickiest. note that all these formats can vary on
261
+ # whether they actually contain a date, time, or datetime.
262
+ when :date, :time, :date_time
263
+ days_since_1900, fraction_of_24 = value.split('.')
264
+
265
+ # http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
266
+ date = Date.new(1899, 12, 30) + Integer(days_since_1900)
267
+
268
+ if fraction_of_24 # there is a time associated
269
+ fraction_of_24 = "0.#{fraction_of_24}".to_f
270
+ military = fraction_of_24 * 24
271
+ hour = military.truncate
272
+ minute = ((military % 1) * 60).truncate
273
+
274
+ return Time.utc(date.year, date.month, date.day, hour, minute)
275
+ else
276
+ return date
277
+ end
278
+ when :bignum
279
+ if defined?(BigDecimal)
280
+ BigDecimal.new(value)
281
+ else
282
+ value.to_f
283
+ end
284
+
285
+ ##
286
+ # Beats me
287
+ ##
288
+
289
+ else
290
+ value
291
+ end
292
+ end
293
+
294
+ # Map of non-custom numFmtId to casting symbol
295
+ NumFmtMap = {
296
+ 0 => :string, # General
297
+ 1 => :fixnum, # 0
298
+ 2 => :float, # 0.00
299
+ 3 => :fixnum, # #,##0
300
+ 4 => :float, # #,##0.00
301
+ 5 => :unsupported, # $#,##0_);($#,##0)
302
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
303
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
304
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
305
+ 9 => :percentage, # 0%
306
+ 10 => :percentage, # 0.00%
307
+ 11 => :bignum, # 0.00E+00
308
+ 12 => :unsupported, # # ?/?
309
+ 13 => :unsupported, # # ??/??
310
+ 14 => :date, # mm-dd-yy
311
+ 15 => :date, # d-mmm-yy
312
+ 16 => :date, # d-mmm
313
+ 17 => :date, # mmm-yy
314
+ 18 => :time, # h:mm AM/PM
315
+ 19 => :time, # h:mm:ss AM/PM
316
+ 20 => :time, # h:mm
317
+ 21 => :time, # h:mm:ss
318
+ 22 => :date_time, # m/d/yy h:mm
319
+ 37 => :unsupported, # #,##0 ;(#,##0)
320
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
321
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
322
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
323
+ 45 => :time, # mm:ss
324
+ 46 => :time, # [h]:mm:ss
325
+ 47 => :time, # mmss.0
326
+ 48 => :bignum, # ##0.0E+0
327
+ 49 => :unsupported # @
328
+ }
329
+
330
+ # For performance reasons, excel uses an optional SpreadsheetML feature
331
+ # that puts all strings in a separate xml file, and then references
332
+ # them by their index in that file.
333
+ #
334
+ # http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
335
+ def shared_strings
336
+ @shared_strings ||= begin
337
+ if xml.shared_strings
338
+ xml.shared_strings.xpath('/xmlns:sst/xmlns:si').map do |xsst|
339
+ # a shared string can be a single value...
340
+ sst = xsst.xpath('xmlns:t/text()').first
341
+ sst = sst.text if sst
342
+ # ... or a composite of seperately styled words/characters
343
+ sst ||= xsst.xpath('xmlns:r/xmlns:t/text()').map(&:text).join
344
+ end
345
+ else
346
+ []
347
+ end
348
+ end
349
+ end
350
+
351
+ end
352
+
353
+ end
354
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleXlsxReader
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'simple_xlsx_reader/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "simple_xlsx_reader"
8
+ gem.version = SimpleXlsxReader::VERSION
9
+ gem.authors = ["Woody Peterson"]
10
+ gem.email = ["woody@sigby.com"]
11
+ gem.description = %q{Read xlsx data the Ruby way}
12
+ gem.summary = %q{Read xlsx data the Ruby way}
13
+ gem.homepage = ""
14
+
15
+ gem.add_dependency 'nokogiri'
16
+ gem.add_dependency 'rubyzip'
17
+
18
+ gem.add_development_dependency 'pry'
19
+
20
+ gem.files = `git ls-files`.split($/)
21
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
22
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
23
+ gem.require_paths = ["lib"]
24
+ end
Binary file
@@ -0,0 +1,80 @@
1
+ <sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="6" uniqueCount="5">
2
+ <si>
3
+ <t>Cell A1</t>
4
+ </si>
5
+ <si>
6
+ <t>Cell B1</t>
7
+ </si>
8
+ <si>
9
+ <t>My Cell</t>
10
+ </si>
11
+ <si>
12
+ <r>
13
+ <rPr>
14
+ <sz val="11"/>
15
+ <color rgb="FFFF0000"/>
16
+ <rFont val="Calibri"/>
17
+ <family val="2"/>
18
+ <scheme val="minor"/>
19
+ </rPr>
20
+ <t>Cell</t>
21
+ </r>
22
+ <r>
23
+ <rPr>
24
+ <sz val="11"/>
25
+ <color theme="1"/>
26
+ <rFont val="Calibri"/>
27
+ <family val="2"/>
28
+ <scheme val="minor"/>
29
+ </rPr>
30
+ <t xml:space="preserve"> </t>
31
+ </r>
32
+ <r>
33
+ <rPr>
34
+ <b/>
35
+ <sz val="11"/>
36
+ <color theme="1"/>
37
+ <rFont val="Calibri"/>
38
+ <family val="2"/>
39
+ <scheme val="minor"/>
40
+ </rPr>
41
+ <t>A2</t>
42
+ </r>
43
+ </si>
44
+ <si>
45
+ <r>
46
+ <rPr>
47
+ <sz val="11"/>
48
+ <color rgb="FF00B0F0"/>
49
+ <rFont val="Calibri"/>
50
+ <family val="2"/>
51
+ <scheme val="minor"/>
52
+ </rPr>
53
+ <t>Cell</t>
54
+ </r>
55
+ <r>
56
+ <rPr>
57
+ <sz val="11"/>
58
+ <color theme="1"/>
59
+ <rFont val="Calibri"/>
60
+ <family val="2"/>
61
+ <scheme val="minor"/>
62
+ </rPr>
63
+ <t xml:space="preserve"> </t>
64
+ </r>
65
+ <r>
66
+ <rPr>
67
+ <i/>
68
+ <sz val="11"/>
69
+ <color theme="1"/>
70
+ <rFont val="Calibri"/>
71
+ <family val="2"/>
72
+ <scheme val="minor"/>
73
+ </rPr>
74
+ <t>B2</t>
75
+ </r>
76
+ </si>
77
+ <si>
78
+ <t>Cell Fmt</t>
79
+ </si>
80
+ </sst>
@@ -0,0 +1,108 @@
1
+ require_relative 'test_helper'
2
+ require 'time'
3
+
4
+ describe SimpleXlsxReader do
5
+ let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
6
+ 'sesame_street_blog.xlsx') }
7
+
8
+ let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
9
+
10
+ describe '#to_hash' do
11
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
12
+ subject.to_hash.must_equal({
13
+ "Authors"=>
14
+ [["Name", "Occupation"],
15
+ ["Big Bird", "Teacher"]],
16
+
17
+ "Posts"=>
18
+ [["Author Name", "Title", "Body", "Created At", "Comment Count"],
19
+ ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
20
+ ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2]]
21
+ })
22
+ end
23
+ end
24
+
25
+ describe SimpleXlsxReader::Document::Mapper do
26
+ let(:described_class) { SimpleXlsxReader::Document::Mapper }
27
+
28
+ describe '::cast' do
29
+ it 'reads type s as a shared string' do
30
+ described_class.cast('1', 's', shared_strings: ['a', 'b', 'c']).
31
+ must_equal 'b'
32
+ end
33
+
34
+ it 'reads type inlineStr as a string' do
35
+ xml = Nokogiri::XML(%( <c t="inlineStr"><is><t>the value</t></is></c> ))
36
+ described_class.cast(xml.text, 'inlineStr').must_equal 'the value'
37
+ end
38
+ end
39
+
40
+ describe '#shared_strings' do
41
+ let(:xml) do
42
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
43
+ xml.shared_strings = Nokogiri::XML(File.read(
44
+ File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
45
+ end
46
+ end
47
+
48
+ subject { described_class.new(xml) }
49
+
50
+ it 'parses strings formatted at the cell level' do
51
+ subject.shared_strings[0..2].must_equal ['Cell A1', 'Cell B1', 'My Cell']
52
+ end
53
+
54
+ it 'parses strings formatted at the character level' do
55
+ subject.shared_strings[3..5].must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
56
+ end
57
+ end
58
+
59
+ describe "parse errors" do
60
+ after do
61
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
62
+ end
63
+
64
+ let(:xml) do
65
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
66
+ xml.sheets = [Nokogiri::XML(
67
+ <<-XML
68
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
69
+ <sheetData>
70
+ <row>
71
+ <c s='0'>
72
+ <v>14 is a date style; this is not a date</v>
73
+ </c>
74
+ </row>
75
+ </sheetData>
76
+ </worksheet>
77
+ XML
78
+ )]
79
+
80
+ # s='0' above refers to the value of numFmtId at cellXfs index 0
81
+ xml.styles = Nokogiri::XML(
82
+ <<-XML
83
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
84
+ <cellXfs count="1">
85
+ <xf numFmtId="14" />
86
+ </cellXfs>
87
+ </styleSheet>
88
+ XML
89
+ )
90
+ end
91
+ end
92
+
93
+ it 'raises if configuration.raise_on_parse_error' do
94
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
95
+
96
+ lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
97
+ must_raise(SimpleXlsxReader::CellLoadError)
98
+ end
99
+
100
+ it 'records a load error if not configuration.raise_on_parse_error' do
101
+ SimpleXlsxReader.configuration.catch_cell_load_errors = true
102
+
103
+ sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
104
+ sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,7 @@
1
+ gem 'minitest'
2
+ require 'minitest/spec'
3
+ require 'minitest/autorun'
4
+ require 'pry'
5
+
6
+ $:.unshift File.expand_path("lib")
7
+ require 'simple_xlsx_reader'
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_xlsx_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Woody Peterson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rubyzip
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: pry
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Read xlsx data the Ruby way
63
+ email:
64
+ - woody@sigby.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - LICENSE.txt
72
+ - README.md
73
+ - Rakefile
74
+ - lib/simple_xlsx_reader.rb
75
+ - lib/simple_xlsx_reader/version.rb
76
+ - simple_xlsx_reader.gemspec
77
+ - test/sesame_street_blog.xlsx
78
+ - test/shared_strings.xml
79
+ - test/simple_xlsx_reader_test.rb
80
+ - test/test_helper.rb
81
+ homepage: ''
82
+ licenses: []
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ! '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 1.8.24
102
+ signing_key:
103
+ specification_version: 3
104
+ summary: Read xlsx data the Ruby way
105
+ test_files:
106
+ - test/sesame_street_blog.xlsx
107
+ - test/shared_strings.xml
108
+ - test/simple_xlsx_reader_test.rb
109
+ - test/test_helper.rb
110
+ has_rdoc: