simple_xlsx_reader 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in simple_xlsx_reader.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Woody Peterson
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,100 @@
1
+ # SimpleXlsxReader
2
+
3
+ An xlsx reader for Ruby that parses xlsx cell values into plain ruby
4
+ primitives and dates/times.
5
+
6
+ This is *not* a rewrite of excel in Ruby. Font styles, for
7
+ example, are parsed to determine whether a cell is a number or a date,
8
+ then forgotten. We just want to get the data, and get out!
9
+
10
+ ## Usage
11
+
12
+ ### Summary:
13
+
14
+ doc = SimpleXlsxReader.open('/path/to/workbook.xlsx')
15
+ doc.sheets # => [<#SXR::Sheet>, ...]
16
+ doc.sheets.first.name # 'Sheet1'
17
+ doc.sheets.first.rows # [['Header 1', 'Header 2', ...]
18
+ ['foo', 2, ...]]
19
+
20
+ That's it!
21
+
22
+ ### Load Errors
23
+
24
+ By default, cell load errors (ex. if a date cell contains the string
25
+ 'hello') result in a SimpleXlsxReader::CellLoadError.
26
+
27
+ If you would like to provide better error feedback to your users, you
28
+ can set `SimpleXlsxReader.configuration.catch_cell_load_errors =
29
+ true`, and load errors will instead be inserted into Sheet#load_errors keyed
30
+ by [rownum, colnum].
31
+
32
+ ### More
33
+
34
+ Here's the totality of the public api, in code:
35
+
36
+ module SimpleXlsxReader
37
+ def self.open(file_path)
38
+ Document.new(file_path).tap(&:sheets)
39
+ end
40
+
41
+ class Document
42
+ attr_reader :file_path
43
+
44
+ def initialize(file_path)
45
+ @file_path = file_path
46
+ end
47
+
48
+ def sheets
49
+ @sheets ||= Mapper.new(xml).load_sheets
50
+ end
51
+
52
+ def to_hash
53
+ sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
54
+ end
55
+
56
+ def xml
57
+ Xml.load(file_path)
58
+ end
59
+
60
+ class Sheet < Struct.new(:name, :rows)
61
+ def headers
62
+ rows[0]
63
+ end
64
+
65
+ def data
66
+ rows[1..-1]
67
+ end
68
+
69
+ # Load errors will be a hash of the form:
70
+ # {
71
+ # [rownum, colnum] => '[error]'
72
+ # }
73
+ def load_errors
74
+ @load_errors ||= {}
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+ ## Installation
81
+
82
+ Add this line to your application's Gemfile:
83
+
84
+ gem 'simple_xlsx_reader'
85
+
86
+ And then execute:
87
+
88
+ $ bundle
89
+
90
+ Or install it yourself as:
91
+
92
+ $ gem install simple_xlsx_reader
93
+
94
+ ## Contributing
95
+
96
+ 1. Fork it
97
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
98
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
99
+ 4. Push to the branch (`git push origin my-new-feature`)
100
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,8 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+ Rake::TestTask.new do |t|
5
+ t.pattern = "test/**/*_test.rb"
6
+ end
7
+
8
+ task :default => [:test]
@@ -0,0 +1,354 @@
1
+ require "simple_xlsx_reader/version"
2
+ require 'nokogiri'
3
+ require 'zip/zip'
4
+ require 'zip/zipfilesystem'
5
+ require 'date'
6
+
7
+ module SimpleXlsxReader
8
+ class CellLoadError < StandardError; end
9
+
10
+ def self.configuration
11
+ @configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
12
+ c.catch_cell_load_errors = false
13
+ end
14
+ end
15
+
16
+ def self.open(file_path)
17
+ Document.new(file_path).tap(&:sheets)
18
+ end
19
+
20
+ class Document
21
+ attr_reader :file_path
22
+
23
+ def initialize(file_path)
24
+ @file_path = file_path
25
+ end
26
+
27
+ def sheets
28
+ @sheets ||= Mapper.new(xml).load_sheets
29
+ end
30
+
31
+ def to_hash
32
+ sheets.inject({}) {|acc, sheet| acc[sheet.name] = sheet.rows; acc}
33
+ end
34
+
35
+ def xml
36
+ Xml.load(file_path)
37
+ end
38
+
39
+ class Sheet < Struct.new(:name, :rows)
40
+ def headers
41
+ rows[0]
42
+ end
43
+
44
+ def data
45
+ rows[1..-1]
46
+ end
47
+
48
+ # Load errors will be a hash of the form:
49
+ # {
50
+ # [rownum, colnum] => '[error]'
51
+ # }
52
+ def load_errors
53
+ @load_errors ||= {}
54
+ end
55
+ end
56
+
57
+ ##
58
+ # For internal use; stores source xml in nokogiri documents
59
+ class Xml
60
+ attr_accessor :workbook, :shared_strings, :sheets, :styles
61
+
62
+ def self.load(file_path)
63
+ self.new.tap do |xml|
64
+ Zip::ZipFile.open(file_path) do |zip|
65
+ xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml'))
66
+ xml.styles = Nokogiri::XML(zip.read('xl/styles.xml'))
67
+
68
+ # optional feature used by excel, but not often used by xlsx
69
+ # generation libraries
70
+ if zip.file.file?('xl/sharedStrings.xml')
71
+ xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml'))
72
+ end
73
+
74
+ xml.sheets = []
75
+ i = 0
76
+ loop do
77
+ i += 1
78
+ break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
79
+
80
+ xml.sheets <<
81
+ Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml"))
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ ##
89
+ # For internal use; translates source xml to Sheet objects.
90
+ class Mapper < Struct.new(:xml)
91
+ def load_sheets
92
+ sheet_toc.map do |(sheet_name, sheet_number)|
93
+ parse_sheet(sheet_name, xml.sheets[sheet_number])
94
+ end
95
+ end
96
+
97
+ # Table of contents for the sheets, ex. {'Authors' => 0, ...}
98
+ def sheet_toc
99
+ xml.workbook.xpath('/xmlns:workbook/xmlns:sheets/xmlns:sheet').
100
+ inject({}) do |acc, sheet|
101
+
102
+ acc[sheet.attributes['name'].value] =
103
+ sheet.attributes['sheetId'].value.to_i - 1 # keep things 0-indexed
104
+
105
+ acc
106
+ end
107
+ end
108
+
109
+ def parse_sheet(sheet_name, xsheet)
110
+ sheet = Sheet.new(sheet_name)
111
+
112
+ rownum = -1
113
+ sheet.rows =
114
+ xsheet.xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row").map do |xrow|
115
+ rownum += 1
116
+
117
+ colnum = -1
118
+ xrow.xpath('xmlns:c').map do |xcell|
119
+ colnum += 1
120
+
121
+ type = xcell.attributes['t'] &&
122
+ xcell.attributes['t'].value
123
+ # If not the above, attempt to determine from a custom style
124
+ type ||= xcell.attributes['s'] &&
125
+ style_types[xcell.attributes['s'].value.to_i]
126
+
127
+ begin
128
+ self.class.cast(xcell.text, type, shared_strings: shared_strings)
129
+ rescue => e
130
+ if !SimpleXlsxReader.configuration.catch_cell_load_errors
131
+ error = CellLoadError.new(
132
+ "Row #{rownum}, Col #{colnum}: #{e.message}")
133
+ error.set_backtrace(e.backtrace)
134
+ raise error
135
+ else
136
+ sheet.load_errors[[rownum, colnum]] = e.message
137
+
138
+ xcell.text
139
+ end
140
+ end
141
+ end
142
+ end
143
+
144
+ sheet
145
+ end
146
+
147
+ # Excel doesn't record types for some cells, only its display style, so
148
+ # we have to back out the type from that style.
149
+ #
150
+ # Some of these styles can be determined from a known set (see NumFmtMap),
151
+ # while others are 'custom' and we have to make a best guess.
152
+ #
153
+ # This is the array of types corresponding to the styles a spreadsheet
154
+ # uses, and includes both the known style types and the custom styles.
155
+ #
156
+ # Note that the xml sheet cells that use this don't reference the
157
+ # numFmtId, but instead the array index of a style in the stored list of
158
+ # only the styles used in the spreadsheet (which can be either known or
159
+ # custom). Hence this style types array, rather than a map of numFmtId to
160
+ # type.
161
+ def style_types
162
+ @style_types ||=
163
+ xml.styles.xpath('/xmlns:styleSheet/xmlns:cellXfs/xmlns:xf').map {|xstyle|
164
+ style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
165
+ end
166
+
167
+ # Finds the type we think a style is; For example, fmtId 14 is a date
168
+ # style, so this would return :date
169
+ def style_type_by_num_fmt_id(id)
170
+ return nil if id.nil?
171
+
172
+ id = id.to_i
173
+ if id > 164 # custom style, arg!
174
+ custom_style_types[id]
175
+ else # we should know this one
176
+ NumFmtMap[id]
177
+ end
178
+ end
179
+
180
+ # Map of (numFmtId > 164) (custom styles) to our best guess at the type
181
+ # ex. {165 => :date_time}
182
+ def custom_style_types
183
+ @custom_style_types ||=
184
+ xml.styles.xpath('/xmlns:styleSheet/xmlns:numFmts/xmlns:numFmt').
185
+ inject({}) do |acc, xstyle|
186
+
187
+ acc[xstyle.attributes['numFmtId'].value.to_i] =
188
+ determine_custom_style_type(xstyle.attributes['formatCode'].value)
189
+
190
+ acc
191
+ end
192
+ end
193
+
194
+ # This is the least deterministic part of reading xlsx files. Due to
195
+ # custom styles, you can't know for sure when a date is a date other than
196
+ # looking at its format and gessing. It's not impossible to guess right,
197
+ # though.
198
+ #
199
+ # http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
200
+ def determine_custom_style_type(string)
201
+ return :float if string[0] == '_'
202
+ return :float if string[0] == ' 0'
203
+
204
+ # Looks for one of ymdhis outside of meta-stuff like [Red]
205
+ return :date_time if string =~ /(^|\])[^\[]*[ymdhis]/i
206
+
207
+ return :unsupported
208
+ end
209
+
210
+ ##
211
+ # The heart of typecasting. The ruby type is determined either explicitly
212
+ # from the cell xml or implicitly from the cell style, and this
213
+ # method expects that work to have been done already. This, then,
214
+ # takes the type we determined it to be and casts the cell value
215
+ # to that type.
216
+ #
217
+ # types:
218
+ # - s: shared string (see #shared_string)
219
+ # - n: number (cast to a float)
220
+ # - b: boolean
221
+ # - str: string
222
+ # - inlineStr: string
223
+ # - ruby symbol: for when type has been determined by style
224
+ #
225
+ # options:
226
+ # - shared_strings: needed for 's' (shared string) type
227
+ def self.cast(value, type, options = {})
228
+ return nil if value.nil? || value.empty?
229
+
230
+ case type
231
+
232
+ ##
233
+ # There are few built-in types
234
+ ##
235
+
236
+ when 's' # shared string
237
+ options[:shared_strings][value.to_i]
238
+ when 'n' # number
239
+ value.to_f
240
+ when 'b'
241
+ value.to_i == 1
242
+ when 'str'
243
+ value
244
+ when 'inlineStr'
245
+ value
246
+
247
+ ##
248
+ # Type can also be determined by a style,
249
+ # detected earlier and cast here by its standardized symbol
250
+ ##
251
+
252
+ when :string, :unsupported
253
+ value
254
+ when :fixnum
255
+ value.to_i
256
+ when :float
257
+ value.to_f
258
+ when :percentage
259
+ value.to_f / 100
260
+ # the trickiest. note that all these formats can vary on
261
+ # whether they actually contain a date, time, or datetime.
262
+ when :date, :time, :date_time
263
+ days_since_1900, fraction_of_24 = value.split('.')
264
+
265
+ # http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
266
+ date = Date.new(1899, 12, 30) + Integer(days_since_1900)
267
+
268
+ if fraction_of_24 # there is a time associated
269
+ fraction_of_24 = "0.#{fraction_of_24}".to_f
270
+ military = fraction_of_24 * 24
271
+ hour = military.truncate
272
+ minute = ((military % 1) * 60).truncate
273
+
274
+ return Time.utc(date.year, date.month, date.day, hour, minute)
275
+ else
276
+ return date
277
+ end
278
+ when :bignum
279
+ if defined?(BigDecimal)
280
+ BigDecimal.new(value)
281
+ else
282
+ value.to_f
283
+ end
284
+
285
+ ##
286
+ # Beats me
287
+ ##
288
+
289
+ else
290
+ value
291
+ end
292
+ end
293
+
294
+ # Map of non-custom numFmtId to casting symbol
295
+ NumFmtMap = {
296
+ 0 => :string, # General
297
+ 1 => :fixnum, # 0
298
+ 2 => :float, # 0.00
299
+ 3 => :fixnum, # #,##0
300
+ 4 => :float, # #,##0.00
301
+ 5 => :unsupported, # $#,##0_);($#,##0)
302
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
303
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
304
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
305
+ 9 => :percentage, # 0%
306
+ 10 => :percentage, # 0.00%
307
+ 11 => :bignum, # 0.00E+00
308
+ 12 => :unsupported, # # ?/?
309
+ 13 => :unsupported, # # ??/??
310
+ 14 => :date, # mm-dd-yy
311
+ 15 => :date, # d-mmm-yy
312
+ 16 => :date, # d-mmm
313
+ 17 => :date, # mmm-yy
314
+ 18 => :time, # h:mm AM/PM
315
+ 19 => :time, # h:mm:ss AM/PM
316
+ 20 => :time, # h:mm
317
+ 21 => :time, # h:mm:ss
318
+ 22 => :date_time, # m/d/yy h:mm
319
+ 37 => :unsupported, # #,##0 ;(#,##0)
320
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
321
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
322
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
323
+ 45 => :time, # mm:ss
324
+ 46 => :time, # [h]:mm:ss
325
+ 47 => :time, # mmss.0
326
+ 48 => :bignum, # ##0.0E+0
327
+ 49 => :unsupported # @
328
+ }
329
+
330
+ # For performance reasons, excel uses an optional SpreadsheetML feature
331
+ # that puts all strings in a separate xml file, and then references
332
+ # them by their index in that file.
333
+ #
334
+ # http://msdn.microsoft.com/en-us/library/office/gg278314.aspx
335
+ def shared_strings
336
+ @shared_strings ||= begin
337
+ if xml.shared_strings
338
+ xml.shared_strings.xpath('/xmlns:sst/xmlns:si').map do |xsst|
339
+ # a shared string can be a single value...
340
+ sst = xsst.xpath('xmlns:t/text()').first
341
+ sst = sst.text if sst
342
+ # ... or a composite of seperately styled words/characters
343
+ sst ||= xsst.xpath('xmlns:r/xmlns:t/text()').map(&:text).join
344
+ end
345
+ else
346
+ []
347
+ end
348
+ end
349
+ end
350
+
351
+ end
352
+
353
+ end
354
+ end
@@ -0,0 +1,3 @@
1
+ module SimpleXlsxReader
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,24 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'simple_xlsx_reader/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "simple_xlsx_reader"
8
+ gem.version = SimpleXlsxReader::VERSION
9
+ gem.authors = ["Woody Peterson"]
10
+ gem.email = ["woody@sigby.com"]
11
+ gem.description = %q{Read xlsx data the Ruby way}
12
+ gem.summary = %q{Read xlsx data the Ruby way}
13
+ gem.homepage = ""
14
+
15
+ gem.add_dependency 'nokogiri'
16
+ gem.add_dependency 'rubyzip'
17
+
18
+ gem.add_development_dependency 'pry'
19
+
20
+ gem.files = `git ls-files`.split($/)
21
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
22
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
23
+ gem.require_paths = ["lib"]
24
+ end
Binary file
@@ -0,0 +1,80 @@
1
+ <sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="6" uniqueCount="5">
2
+ <si>
3
+ <t>Cell A1</t>
4
+ </si>
5
+ <si>
6
+ <t>Cell B1</t>
7
+ </si>
8
+ <si>
9
+ <t>My Cell</t>
10
+ </si>
11
+ <si>
12
+ <r>
13
+ <rPr>
14
+ <sz val="11"/>
15
+ <color rgb="FFFF0000"/>
16
+ <rFont val="Calibri"/>
17
+ <family val="2"/>
18
+ <scheme val="minor"/>
19
+ </rPr>
20
+ <t>Cell</t>
21
+ </r>
22
+ <r>
23
+ <rPr>
24
+ <sz val="11"/>
25
+ <color theme="1"/>
26
+ <rFont val="Calibri"/>
27
+ <family val="2"/>
28
+ <scheme val="minor"/>
29
+ </rPr>
30
+ <t xml:space="preserve"> </t>
31
+ </r>
32
+ <r>
33
+ <rPr>
34
+ <b/>
35
+ <sz val="11"/>
36
+ <color theme="1"/>
37
+ <rFont val="Calibri"/>
38
+ <family val="2"/>
39
+ <scheme val="minor"/>
40
+ </rPr>
41
+ <t>A2</t>
42
+ </r>
43
+ </si>
44
+ <si>
45
+ <r>
46
+ <rPr>
47
+ <sz val="11"/>
48
+ <color rgb="FF00B0F0"/>
49
+ <rFont val="Calibri"/>
50
+ <family val="2"/>
51
+ <scheme val="minor"/>
52
+ </rPr>
53
+ <t>Cell</t>
54
+ </r>
55
+ <r>
56
+ <rPr>
57
+ <sz val="11"/>
58
+ <color theme="1"/>
59
+ <rFont val="Calibri"/>
60
+ <family val="2"/>
61
+ <scheme val="minor"/>
62
+ </rPr>
63
+ <t xml:space="preserve"> </t>
64
+ </r>
65
+ <r>
66
+ <rPr>
67
+ <i/>
68
+ <sz val="11"/>
69
+ <color theme="1"/>
70
+ <rFont val="Calibri"/>
71
+ <family val="2"/>
72
+ <scheme val="minor"/>
73
+ </rPr>
74
+ <t>B2</t>
75
+ </r>
76
+ </si>
77
+ <si>
78
+ <t>Cell Fmt</t>
79
+ </si>
80
+ </sst>
@@ -0,0 +1,108 @@
1
+ require_relative 'test_helper'
2
+ require 'time'
3
+
4
+ describe SimpleXlsxReader do
5
+ let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
6
+ 'sesame_street_blog.xlsx') }
7
+
8
+ let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
9
+
10
+ describe '#to_hash' do
11
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
12
+ subject.to_hash.must_equal({
13
+ "Authors"=>
14
+ [["Name", "Occupation"],
15
+ ["Big Bird", "Teacher"]],
16
+
17
+ "Posts"=>
18
+ [["Author Name", "Title", "Body", "Created At", "Comment Count"],
19
+ ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
20
+ ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2]]
21
+ })
22
+ end
23
+ end
24
+
25
+ describe SimpleXlsxReader::Document::Mapper do
26
+ let(:described_class) { SimpleXlsxReader::Document::Mapper }
27
+
28
+ describe '::cast' do
29
+ it 'reads type s as a shared string' do
30
+ described_class.cast('1', 's', shared_strings: ['a', 'b', 'c']).
31
+ must_equal 'b'
32
+ end
33
+
34
+ it 'reads type inlineStr as a string' do
35
+ xml = Nokogiri::XML(%( <c t="inlineStr"><is><t>the value</t></is></c> ))
36
+ described_class.cast(xml.text, 'inlineStr').must_equal 'the value'
37
+ end
38
+ end
39
+
40
+ describe '#shared_strings' do
41
+ let(:xml) do
42
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
43
+ xml.shared_strings = Nokogiri::XML(File.read(
44
+ File.join(File.dirname(__FILE__), 'shared_strings.xml') ))
45
+ end
46
+ end
47
+
48
+ subject { described_class.new(xml) }
49
+
50
+ it 'parses strings formatted at the cell level' do
51
+ subject.shared_strings[0..2].must_equal ['Cell A1', 'Cell B1', 'My Cell']
52
+ end
53
+
54
+ it 'parses strings formatted at the character level' do
55
+ subject.shared_strings[3..5].must_equal ['Cell A2', 'Cell B2', 'Cell Fmt']
56
+ end
57
+ end
58
+
59
+ describe "parse errors" do
60
+ after do
61
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
62
+ end
63
+
64
+ let(:xml) do
65
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
66
+ xml.sheets = [Nokogiri::XML(
67
+ <<-XML
68
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
69
+ <sheetData>
70
+ <row>
71
+ <c s='0'>
72
+ <v>14 is a date style; this is not a date</v>
73
+ </c>
74
+ </row>
75
+ </sheetData>
76
+ </worksheet>
77
+ XML
78
+ )]
79
+
80
+ # s='0' above refers to the value of numFmtId at cellXfs index 0
81
+ xml.styles = Nokogiri::XML(
82
+ <<-XML
83
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
84
+ <cellXfs count="1">
85
+ <xf numFmtId="14" />
86
+ </cellXfs>
87
+ </styleSheet>
88
+ XML
89
+ )
90
+ end
91
+ end
92
+
93
+ it 'raises if configuration.raise_on_parse_error' do
94
+ SimpleXlsxReader.configuration.catch_cell_load_errors = false
95
+
96
+ lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
97
+ must_raise(SimpleXlsxReader::CellLoadError)
98
+ end
99
+
100
+ it 'records a load error if not configuration.raise_on_parse_error' do
101
+ SimpleXlsxReader.configuration.catch_cell_load_errors = true
102
+
103
+ sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
104
+ sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
105
+ end
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,7 @@
1
+ gem 'minitest'
2
+ require 'minitest/spec'
3
+ require 'minitest/autorun'
4
+ require 'pry'
5
+
6
+ $:.unshift File.expand_path("lib")
7
+ require 'simple_xlsx_reader'
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: simple_xlsx_reader
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Woody Peterson
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-01-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: nokogiri
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: rubyzip
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: pry
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ description: Read xlsx data the Ruby way
63
+ email:
64
+ - woody@sigby.com
65
+ executables: []
66
+ extensions: []
67
+ extra_rdoc_files: []
68
+ files:
69
+ - .gitignore
70
+ - Gemfile
71
+ - LICENSE.txt
72
+ - README.md
73
+ - Rakefile
74
+ - lib/simple_xlsx_reader.rb
75
+ - lib/simple_xlsx_reader/version.rb
76
+ - simple_xlsx_reader.gemspec
77
+ - test/sesame_street_blog.xlsx
78
+ - test/shared_strings.xml
79
+ - test/simple_xlsx_reader_test.rb
80
+ - test/test_helper.rb
81
+ homepage: ''
82
+ licenses: []
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ none: false
89
+ requirements:
90
+ - - ! '>='
91
+ - !ruby/object:Gem::Version
92
+ version: '0'
93
+ required_rubygems_version: !ruby/object:Gem::Requirement
94
+ none: false
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
98
+ version: '0'
99
+ requirements: []
100
+ rubyforge_project:
101
+ rubygems_version: 1.8.24
102
+ signing_key:
103
+ specification_version: 3
104
+ summary: Read xlsx data the Ruby way
105
+ test_files:
106
+ - test/sesame_street_blog.xlsx
107
+ - test/shared_strings.xml
108
+ - test/simple_xlsx_reader_test.rb
109
+ - test/test_helper.rb
110
+ has_rdoc: