simple_xlsx_reader 1.0.1 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 94422da0193805c579ba37c7c3e58b35a996dfbc
4
- data.tar.gz: a9c5e1f01acc0c60165a13adc1af087743a60935
2
+ SHA256:
3
+ metadata.gz: e2b04473235c5ed2c2764f62a627fa6f16816c36e0fcff3497be229f8666a0f7
4
+ data.tar.gz: 9367b0082f31e9cb208d9f97ed6cb67d5276a459562809460694602339dfdaad
5
5
  SHA512:
6
- metadata.gz: 33338f8fcf3c180ea346548061598953842358a21acd6d97bf451c07d8655f179af0cf7b7791f7c9de1a8411578e3623faab178b3cd74893aaf6d040a7abde96
7
- data.tar.gz: 50035b920f6811eed88c318c17b47bf8823aa1ac4bf114af3bc29174edcf08ebd5d16902177aa6a48b70f8e70a745249bb8494101f9f310f24d5f5d5bbc13f27
6
+ metadata.gz: cd42f7a0b8830a2f01703dca10ae779b973566ad25e3b74d31dc3693977fa5b2b3442e47bc1a3b50723bae3bb9f31facd923f1eaba06b51cc8b927e7fb207cf3
7
+ data.tar.gz: 38ecb026b0ad5a1985d88349a839a9d2972f85596504e6f300686f9751169a3c8d62582e79119106085a9cadc066517206da117993c3a30f48a5a0c58f256b4c
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ cache: bundler
3
+ before_install:
4
+ - gem update bundler
5
+ rvm:
6
+ - 2.5.8
7
+ - 2.7.2
8
+ - 3.0.0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,26 @@
1
+ ### 1.0.5
2
+
3
+ * Support string or io input via `SimpleXlsxReader#parse` (@kalsan, @til)
4
+
5
+ ### 1.0.4
6
+
7
+ * Fix Windows + RubyZip 1.2.1 bug preventing files from being read
8
+ * Add ability to parse hyperlinks
9
+ * Support files exported from Google Docs (@Strnadj)
10
+
11
+ ### 1.0.3
12
+
13
+ Broken on Ruby 1.9; yanked.
14
+
15
+ ### 1.0.2
16
+
17
+ * Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
18
+ * Better support for non-excel-generated xlsx files [bwlang]
19
+ * You don't always have a numFmtId column, and that's OK
20
+ * Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
21
+ * Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
22
+ * Be more flexible with custom formats using a numFmtId < 164
23
+
1
24
  ### 1.0.1
2
25
 
3
26
  * Add support for the 1904 date system [zilverline]
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # SimpleXlsxReader
1
+ # SimpleXlsxReader [![Build Status](https://travis-ci.org/woahdae/simple_xlsx_reader.svg?branch=master)](https://travis-ci.org/woahdae/simple_xlsx_reader)
2
2
 
3
3
  An xlsx reader for Ruby that parses xlsx cell values into plain ruby
4
4
  primitives and dates/times.
@@ -35,14 +35,21 @@ Here's the totality of the public api, in code:
35
35
 
36
36
  module SimpleXlsxReader
37
37
  def self.open(file_path)
38
- Document.new(file_path).tap(&:sheets)
38
+ Document.new(file_path: file_path).tap(&:sheets)
39
+ end
40
+
41
+ def self.parse(string_or_io)
42
+ Document.new(string_or_io: string_or_io).tap(&:sheets)
39
43
  end
40
44
 
41
45
  class Document
42
- attr_reader :file_path
46
+ attr_reader :string_or_io
47
+
48
+ def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
49
+ ((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
50
+ fail(ArgumentError, 'either file_path or string_or_io must be provided')
43
51
 
44
- def initialize(file_path)
45
- @file_path = file_path
52
+ @string_or_io = string_or_io || File.new(file_path || legacy_file_path)
46
53
  end
47
54
 
48
55
  def sheets
@@ -54,7 +61,7 @@ Here's the totality of the public api, in code:
54
61
  end
55
62
 
56
63
  def xml
57
- Xml.load(file_path)
64
+ Xml.load(string_or_io)
58
65
  end
59
66
 
60
67
  class Sheet < Struct.new(:name, :rows)
@@ -1,3 +1,3 @@
1
1
  module SimpleXlsxReader
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.5"
3
3
  end
@@ -19,6 +19,33 @@ end
19
19
  module SimpleXlsxReader
20
20
  class CellLoadError < StandardError; end
21
21
 
22
+ # We support hyperlinks as a "type" even though they're technically
23
+ # represented either as a function or an external reference in the xlsx spec.
24
+ #
25
+ # Since having hyperlink data in our sheet usually means we might want to do
26
+ # something primarily with the URL (store it in the database, download it, etc),
27
+ # we go through extra effort to parse the function or follow the reference
28
+ # to represent the hyperlink primarily as a URL. However, maybe we do want
29
+ # the hyperlink "friendly name" part (as MS calls it), so here we've subclassed
30
+ # string to tack on the friendly name. This means 80% of us that just want
31
+ # the URL value will have to do nothing extra, but the 20% that might want the
32
+ # friendly name can access it.
33
+ #
34
+ # Note, by default, the value we would get by just asking the cell would
35
+ # be the "friendly name" and *not* the URL, which is tucked away in the
36
+ # function definition or a separate "relationships" meta-document.
37
+ #
38
+ # See MS documentation on the HYPERLINK function for some background:
39
+ # https://support.office.com/en-us/article/HYPERLINK-function-333c7ce6-c5ae-4164-9c47-7de9b76f577f
40
+ class Hyperlink < String
41
+ attr_reader :friendly_name
42
+
43
+ def initialize(url, friendly_name = nil)
44
+ @friendly_name = friendly_name
45
+ super(url)
46
+ end
47
+ end
48
+
22
49
  def self.configuration
23
50
  @configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
24
51
  c.catch_cell_load_errors = false
@@ -26,14 +53,21 @@ module SimpleXlsxReader
26
53
  end
27
54
 
28
55
  def self.open(file_path)
29
- Document.new(file_path).tap(&:sheets)
56
+ Document.new(file_path: file_path).tap(&:sheets)
57
+ end
58
+
59
+ def self.parse(string_or_io)
60
+ Document.new(string_or_io: string_or_io).tap(&:sheets)
30
61
  end
31
62
 
32
63
  class Document
33
- attr_reader :file_path
64
+ attr_reader :string_or_io
65
+
66
+ def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
67
+ ((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
68
+ fail(ArgumentError, 'either file_path or string_or_io must be provided')
34
69
 
35
- def initialize(file_path)
36
- @file_path = file_path
70
+ @string_or_io = string_or_io || File.new(file_path || legacy_file_path)
37
71
  end
38
72
 
39
73
  def sheets
@@ -45,7 +79,7 @@ module SimpleXlsxReader
45
79
  end
46
80
 
47
81
  def xml
48
- Xml.load(file_path)
82
+ Xml.load(string_or_io)
49
83
  end
50
84
 
51
85
  class Sheet < Struct.new(:name, :rows)
@@ -69,28 +103,54 @@ module SimpleXlsxReader
69
103
  ##
70
104
  # For internal use; stores source xml in nokogiri documents
71
105
  class Xml
72
- attr_accessor :workbook, :shared_strings, :sheets, :styles
106
+ attr_accessor :workbook, :shared_strings, :sheets, :sheet_rels, :styles
73
107
 
74
- def self.load(file_path)
108
+ def self.load(string_or_io)
75
109
  self.new.tap do |xml|
76
- SimpleXlsxReader::Zip.open(file_path) do |zip|
77
- xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
78
- xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
79
-
80
- # optional feature used by excel, but not often used by xlsx
81
- # generation libraries
82
- if zip.file.file?('xl/sharedStrings.xml')
83
- xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
84
- end
85
-
110
+ SimpleXlsxReader::Zip.open_buffer(string_or_io) do |zip|
86
111
  xml.sheets = []
87
- i = 0
88
- loop do
89
- i += 1
90
- break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
112
+ xml.sheet_rels = []
113
+
114
+ # This weird style of enumerating over the entries lets us
115
+ # concisely assign entries in a case insensitive and
116
+ # slash insensitive ('/' vs '\') manner.
117
+ #
118
+ # RubyZip used to normalize the slashes, but doesn't now:
119
+ # https://github.com/rubyzip/rubyzip/issues/324
120
+ zip.entries.each do |entry|
121
+ if entry.name.match(/^xl.workbook\.xml$/) # xl/workbook.xml
122
+ xml.workbook = Nokogiri::XML(zip.read(entry)).remove_namespaces!
123
+ elsif entry.name.match(/^xl.styles\.xml$/) # xl/styles.xml
124
+ xml.styles = Nokogiri::XML(zip.read(entry)).remove_namespaces!
125
+ elsif entry.name.match(/^xl.sharedStrings\.xml$/i) # xl/sharedStrings.xml
126
+ # optional feature used by excel, but not often used by xlsx
127
+ # generation libraries. Path name is sometimes lowercase, too.
128
+ xml.shared_strings = Nokogiri::XML(zip.read(entry)).remove_namespaces!
129
+ elsif match = entry.name.match(/^xl.worksheets.sheet([0-9]*)\.xml$/)
130
+ sheet_number = match.captures.first.to_i
131
+ xml.sheets[sheet_number] =
132
+ Nokogiri::XML(zip.read(entry)).remove_namespaces!
133
+ elsif match = entry.name.match(/^xl.worksheets._rels.sheet([0-9]*)\.xml\.rels$/)
134
+ sheet_number = match.captures.first.to_i
135
+ xml.sheet_rels[sheet_number] =
136
+ Nokogiri::XML(zip.read(entry)).remove_namespaces!
137
+ end
138
+ end
91
139
 
92
- xml.sheets <<
93
- Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
140
+ # Sometimes there's a zero-index sheet.xml, ex.
141
+ # Google Docs creates:
142
+ #
143
+ # xl/worksheets/sheet.xml
144
+ # xl/worksheets/sheet1.xml
145
+ # xl/worksheets/sheet2.xml
146
+ # While Excel creates:
147
+ # xl/worksheets/sheet1.xml
148
+ # xl/worksheets/sheet2.xml
149
+ #
150
+ # So, for the latter case, let's shift [null, <Sheet 1>, <Sheet 2>]
151
+ if !xml.sheets[0]
152
+ xml.sheets.shift
153
+ xml.sheet_rels.shift
94
154
  end
95
155
  end
96
156
  end
@@ -105,7 +165,7 @@ module SimpleXlsxReader
105
165
 
106
166
  def load_sheets
107
167
  sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
108
- parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
168
+ parse_sheet(sheet_name, xml.sheets[i], xml.sheet_rels[i]) # sheet_number is *not* the index into xml.sheets
109
169
  end
110
170
  end
111
171
 
@@ -121,9 +181,10 @@ module SimpleXlsxReader
121
181
  end
122
182
  end
123
183
 
124
- def parse_sheet(sheet_name, xsheet)
184
+ def parse_sheet(sheet_name, xsheet, xrels)
125
185
  sheet = Sheet.new(sheet_name)
126
186
  sheet_width, sheet_height = *sheet_dimensions(xsheet)
187
+ cells_w_links = xsheet.xpath('//hyperlinks/hyperlink').inject({}) {|acc, e| acc[e.attr(:ref)] = e.attr(:id); acc}
127
188
 
128
189
  sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
129
190
  xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
@@ -148,10 +209,21 @@ module SimpleXlsxReader
148
209
  # by about 60%. Odd.
149
210
  xvalue = type == 'inlineStr' ?
150
211
  (xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
151
- xcell.children.find {|c| c.name == 'v'}
212
+ xcell.children.find {|c| c.name == 'f' && c.text.start_with?('HYPERLINK(') || c.name == 'v'}
213
+
214
+ if xvalue
215
+ value = xvalue.text.strip
216
+
217
+ if rel_id = cells_w_links[xcell.attr('r')] # a hyperlink made via GUI
218
+ url = xrels.at_xpath(%(//*[@Id="#{rel_id}"])).attr('Target')
219
+ elsif xvalue.name == 'f' # only time we have a function is if it's a hyperlink
220
+ url = value.slice(/HYPERLINK\("(.*?)"/, 1)
221
+ end
222
+ end
152
223
 
153
224
  cell = begin
154
- self.class.cast(xvalue && xvalue.text.strip, type, style,
225
+ self.class.cast(value, type, style,
226
+ :url => url,
155
227
  :shared_strings => shared_strings,
156
228
  :base_date => base_date)
157
229
  rescue => e
@@ -218,11 +290,13 @@ module SimpleXlsxReader
218
290
  # 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
219
291
  # 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
220
292
  def column_letter_to_number(column_letter)
221
- pow = -1
222
- column_letter.codepoints.reverse.inject(0) do |acc, charcode|
223
- pow += 1
224
- acc + 26**pow * (charcode - 64)
293
+ pow = column_letter.length - 1
294
+ result = 0
295
+ column_letter.each_byte do |b|
296
+ result += 26**pow * (b - 64)
297
+ pow -= 1
225
298
  end
299
+ result
226
300
  end
227
301
 
228
302
  # Excel doesn't record types for some cells, only its display style, so
@@ -241,21 +315,32 @@ module SimpleXlsxReader
241
315
  # type.
242
316
  def style_types
243
317
  @style_types ||=
244
- xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
245
- style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
318
+ xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
319
+ style_type_by_num_fmt_id(num_fmt_id(xstyle))}
320
+ end
321
+
322
+ #returns the numFmtId value if it's available
323
+ def num_fmt_id(xstyle)
324
+ if xstyle.attributes['numFmtId']
325
+ xstyle.attributes['numFmtId'].value
326
+ else
327
+ nil
328
+ end
246
329
  end
247
330
 
248
331
  # Finds the type we think a style is; For example, fmtId 14 is a date
249
- # style, so this would return :date
332
+ # style, so this would return :date.
333
+ #
334
+ # Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
335
+ # but in practice can sometimes be simply out of the usual "Any Language"
336
+ # id range that goes up to 49. For example, I have seen a numFmtId of
337
+ # 59 specified as a date. In Thai, 59 is a number format, so this seems
338
+ # like a bad idea, but we try to be flexible and just go with it.
250
339
  def style_type_by_num_fmt_id(id)
251
340
  return nil if id.nil?
252
341
 
253
342
  id = id.to_i
254
- if id >= 164 # custom style, arg!
255
- custom_style_types[id]
256
- else # we should know this one
257
- NumFmtMap[id]
258
- end
343
+ NumFmtMap[id] || custom_style_types[id]
259
344
  end
260
345
 
261
346
  # Map of (numFmtId >= 164) (custom styles) to our best guess at the type
@@ -314,7 +399,7 @@ module SimpleXlsxReader
314
399
  type = style
315
400
  end
316
401
 
317
- case type
402
+ casted = case type
318
403
 
319
404
  ##
320
405
  # There are few built-in types
@@ -347,15 +432,15 @@ module SimpleXlsxReader
347
432
  # the trickiest. note that all these formats can vary on
348
433
  # whether they actually contain a date, time, or datetime.
349
434
  when :date, :time, :date_time
350
- days_since_date_system_start, fraction_of_24 = value.split('.')
435
+ value = Float(value)
436
+ days_since_date_system_start = value.to_i
437
+ fraction_of_24 = value - days_since_date_system_start
351
438
 
352
439
  # http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
353
- date = options.fetch(:base_date, DATE_SYSTEM_1900) + Integer(days_since_date_system_start)
354
-
355
- if fraction_of_24 # there is a time associated
356
- fraction_of_24 = "0.#{fraction_of_24}".to_f
357
- seconds = (fraction_of_24 * 86400).round
440
+ date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
358
441
 
442
+ if fraction_of_24 > 0 # there is a time associated
443
+ seconds = (fraction_of_24 * 86400).round
359
444
  return Time.utc(date.year, date.month, date.day) + seconds
360
445
  else
361
446
  return date
@@ -374,6 +459,12 @@ module SimpleXlsxReader
374
459
  else
375
460
  value
376
461
  end
462
+
463
+ if options[:url]
464
+ Hyperlink.new(options[:url], casted)
465
+ else
466
+ casted
467
+ end
377
468
  end
378
469
 
379
470
  ## Returns the base_date from which to calculate dates.
@@ -7,19 +7,21 @@ Gem::Specification.new do |gem|
7
7
  gem.name = "simple_xlsx_reader"
8
8
  gem.version = SimpleXlsxReader::VERSION
9
9
  gem.authors = ["Woody Peterson"]
10
- gem.email = ["woody@sigby.com"]
10
+ gem.email = ["woody.peterson@gmail.com"]
11
11
  gem.description = %q{Read xlsx data the Ruby way}
12
12
  gem.summary = %q{Read xlsx data the Ruby way}
13
13
  gem.homepage = ""
14
+ gem.license = "MIT"
14
15
 
15
16
  gem.add_dependency 'nokogiri'
16
17
  gem.add_dependency 'rubyzip'
17
18
 
18
19
  gem.add_development_dependency 'minitest', '>= 5.0'
20
+ gem.add_development_dependency 'rake'
19
21
  gem.add_development_dependency 'pry'
20
22
 
21
23
  gem.files = `git ls-files`.split($/)
22
24
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
23
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
25
+ gem.test_files = gem.files.grep(%r{^test/})
24
26
  gem.require_paths = ["lib"]
25
27
  end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:date1904_file) { File.join(File.dirname(__FILE__), 'date1904.xlsx') }
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:datetimes_file) { File.join(File.dirname(__FILE__),
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
11
11
  "Datetimes" =>
12
12
  [[Time.parse("2013-08-19 18:29:59 UTC")],
13
13
  [Time.parse("2013-08-19 18:30:00 UTC")],
14
- [Time.parse("2013-08-19 18:30:01 UTC")]]
14
+ [Time.parse("2013-08-19 18:30:01 UTC")],
15
+ [Time.parse("1899-12-30 00:30:00 UTC")]]
15
16
  })
16
17
  end
17
18
 
data/test/datetimes.xlsx CHANGED
Binary file
Binary file
@@ -0,0 +1,15 @@
1
+ require_relative 'test_helper'
2
+ require 'time'
3
+
4
+ describe SimpleXlsxReader do
5
+ let(:one_sheet_file) { File.join(File.dirname(__FILE__), 'gdocs_sheet.xlsx') }
6
+ let(:subject) { SimpleXlsxReader::Document.new(one_sheet_file) }
7
+
8
+ it 'able to load file from google docs' do
9
+ subject.to_hash.must_equal({
10
+ "List 1" => [["Empty gdocs list 1"]],
11
+ "List 2" => [["Empty gdocs list 2"]]
12
+ })
13
+ end
14
+
15
+ end
Binary file
@@ -0,0 +1,15 @@
1
+ require_relative 'test_helper'
2
+
3
+ describe SimpleXlsxReader do
4
+ let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
5
+ 'lower_case_sharedstrings.xlsx') }
6
+
7
+ let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
8
+
9
+
10
+ describe '#to_hash' do
11
+ it 'should have the word Well in the first row' do
12
+ subject.sheets.first.rows[0].must_include('Well')
13
+ end
14
+ end
15
+ end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'minitest/benchmark'
3
3
 
4
4
  describe 'SimpleXlsxReader Benchmark' do
@@ -96,13 +96,13 @@ describe 'SimpleXlsxReader Benchmark' do
96
96
  bench_exp(1,10000)
97
97
  end
98
98
 
99
- bench_performance_linear 'parses sheets in linear time', 0.9999 do |n|
99
+ bench_performance_linear 'parses sheets in linear time', 0.999 do |n|
100
100
 
101
101
  raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
102
102
  if @xml.sheets[n].nil?
103
103
 
104
104
  sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
105
- parse_sheet('test', @xml.sheets[n])
105
+ parse_sheet('test', @xml.sheets[n], nil)
106
106
 
107
107
  raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
108
108
  if sheet.rows.size != n + 1
Binary file
@@ -1,26 +1,66 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'time'
3
3
 
4
+ SXR = SimpleXlsxReader
5
+
4
6
  describe SimpleXlsxReader do
5
- let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
6
- 'sesame_street_blog.xlsx') }
7
-
8
- let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
9
-
10
- describe '#to_hash' do
11
- it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
12
- subject.to_hash.must_equal({
13
- "Authors"=>
14
- [["Name", "Occupation"],
15
- ["Big Bird", "Teacher"]],
16
-
17
- "Posts"=>
18
- [["Author Name", "Title", "Body", "Created At", "Comment Count"],
19
- ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
20
- ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2],
21
- ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0],
22
- ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil]]
23
- })
7
+ let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
8
+ let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
9
+ let(:expected_result) do
10
+ {
11
+ "Authors"=>
12
+ [["Name", "Occupation"],
13
+ ["Big Bird", "Teacher"]],
14
+ "Posts"=>
15
+ [["Author Name", "Title", "Body", "Created At", "Comment Count", "URL"],
16
+ ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1, SXR::Hyperlink.new("http://www.example.com/hyperlink-function", "This uses the HYPERLINK() function")],
17
+ ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2, SXR::Hyperlink.new("http://www.example.com/hyperlink-gui", "This uses the hyperlink GUI option")],
18
+ ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0, nil],
19
+ ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil, nil]]
20
+ }
21
+ end
22
+
23
+ describe SimpleXlsxReader do
24
+ describe 'load from file path' do
25
+ let(:subject) { SimpleXlsxReader.open(sesame_street_blog_file_path) }
26
+
27
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
28
+ subject.to_hash.must_equal(expected_result)
29
+ end
30
+ end
31
+
32
+ describe 'load from buffer' do
33
+ let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
34
+
35
+ it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
36
+ subject.to_hash.must_equal(expected_result)
37
+ end
38
+ end
39
+ end
40
+
41
+ describe SimpleXlsxReader::Document do
42
+ describe 'load from file path' do
43
+ let(:subject) { SimpleXlsxReader::Document.new(file_path: sesame_street_blog_file_path) }
44
+
45
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
46
+ subject.to_hash.must_equal(expected_result)
47
+ end
48
+ end
49
+
50
+ describe 'load from buffer' do
51
+ let(:subject) { SimpleXlsxReader::Document.new(string_or_io: sesame_street_blog_io) }
52
+
53
+ it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
54
+ subject.to_hash.must_equal(expected_result)
55
+ end
56
+ end
57
+
58
+ describe 'load from file path (legacy API)' do
59
+ let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file_path) }
60
+
61
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
62
+ subject.to_hash.must_equal(expected_result)
63
+ end
24
64
  end
25
65
  end
26
66
 
@@ -63,10 +103,33 @@ describe SimpleXlsxReader do
63
103
  must_equal Time.parse('2013-08-19 18:30 UTC')
64
104
  end
65
105
 
106
+ it 'reads less-than-zero complex number types styled as times' do
107
+ described_class.cast('6.25E-2', 'n', :time).
108
+ must_equal Time.parse('1899-12-30 01:30:00 UTC')
109
+ end
110
+
66
111
  it 'reads number types styled as date_times' do
67
112
  described_class.cast('41505.77083', 'n', :date_time).
68
113
  must_equal Time.parse('2013-08-19 18:30 UTC')
69
114
  end
115
+
116
+ it 'raises when date-styled values are not numerical' do
117
+ lambda { described_class.cast('14 is not a valid date', nil, :date) }.
118
+ must_raise(ArgumentError)
119
+ end
120
+
121
+ describe "with the url option" do
122
+ let(:url) { "http://www.example.com/hyperlink" }
123
+ it 'creates a hyperlink with a string type' do
124
+ described_class.cast("A link", 'str', :string, url: url).
125
+ must_equal SXR::Hyperlink.new(url, "A link")
126
+ end
127
+
128
+ it 'creates a hyperlink with a shared string type' do
129
+ described_class.cast("2", 's', nil, shared_strings: ['a','b','c'], url: url).
130
+ must_equal SXR::Hyperlink.new(url, 'c')
131
+ end
132
+ end
70
133
  end
71
134
 
72
135
  describe '#shared_strings' do
@@ -102,6 +165,13 @@ describe SimpleXlsxReader do
102
165
 
103
166
  it 'reads custom formatted styles (numFmtId >= 164)' do
104
167
  mapper.style_types[1].must_equal :date_time
168
+ mapper.custom_style_types[164].must_equal :date_time
169
+ end
170
+
171
+ # something I've seen in the wild; don't think it's correct, but let's be flexible.
172
+ it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
173
+ mapper.style_types[2].must_equal :date_time
174
+ mapper.custom_style_types[59].must_equal :date_time
105
175
  end
106
176
  end
107
177
 
@@ -246,16 +316,55 @@ describe SimpleXlsxReader do
246
316
  it 'raises if configuration.catch_cell_load_errors' do
247
317
  SimpleXlsxReader.configuration.catch_cell_load_errors = false
248
318
 
249
- lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
319
+ lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first, nil) }.
250
320
  must_raise(SimpleXlsxReader::CellLoadError)
251
321
  end
252
322
 
253
323
  it 'records a load error if not configuration.catch_cell_load_errors' do
254
324
  SimpleXlsxReader.configuration.catch_cell_load_errors = true
255
325
 
256
- sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
257
- sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
326
+ sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil)
327
+ sheet.load_errors[[0,0]].must_include 'invalid value for Float'
328
+ end
329
+ end
330
+
331
+ describe "missing numFmtId attributes" do
332
+
333
+ let(:xml) do
334
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
335
+ xml.sheets = [Nokogiri::XML(
336
+ <<-XML
337
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
338
+ <dimension ref="A1:A1" />
339
+ <sheetData>
340
+ <row>
341
+ <c r='A1' s='s'>
342
+ <v>some content</v>
343
+ </c>
344
+ </row>
345
+ </sheetData>
346
+ </worksheet>
347
+ XML
348
+ ).remove_namespaces!]
349
+
350
+ xml.styles = Nokogiri::XML(
351
+ <<-XML
352
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
353
+
354
+ </styleSheet>
355
+ XML
356
+ ).remove_namespaces!
357
+ end
358
+ end
359
+
360
+ before do
361
+ @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows[0]
258
362
  end
363
+
364
+ it 'continues even when cells are missing numFmtId attributes ' do
365
+ @row[0].must_equal 'some content'
366
+ end
367
+
259
368
  end
260
369
 
261
370
  describe 'parsing types' do
@@ -284,8 +393,21 @@ describe SimpleXlsxReader do
284
393
  <c r='G1' t='inlineStr' s='0'>
285
394
  <is><t>Cell G1</t></is>
286
395
  </c>
396
+
397
+ <c r='H1' s='0'>
398
+ <f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
399
+ <v>HYPERLINK function</v>
400
+ </c>
401
+
402
+ <c r='I1' s='0'>
403
+ <v>GUI-made hyperlink</v>
404
+ </c>
287
405
  </row>
288
406
  </sheetData>
407
+
408
+ <hyperlinks>
409
+ <hyperlink ref="I1" id="rId1"/>
410
+ </hyperlinks>
289
411
  </worksheet>
290
412
  XML
291
413
  ).remove_namespaces!]
@@ -303,11 +425,28 @@ describe SimpleXlsxReader do
303
425
  </styleSheet>
304
426
  XML
305
427
  ).remove_namespaces!
428
+
429
+ # Although not a "type" or "style" according to xlsx spec,
430
+ # it sure could/should be, so let's test it with the rest of our
431
+ # typecasting code.
432
+ xml.sheet_rels = [Nokogiri::XML(
433
+ <<-XML
434
+ <Relationships>
435
+ <Relationship
436
+ Id="rId1"
437
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
438
+ Target="http://www.example.com/hyperlink-gui"
439
+ TargetMode="External"
440
+ />
441
+ </Relationships>
442
+ XML
443
+ ).remove_namespaces!]
444
+
306
445
  end
307
446
  end
308
447
 
309
448
  before do
310
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
449
+ @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, xml.sheet_rels.first).rows[0]
311
450
  end
312
451
 
313
452
  it "reads 'Generic' cells as strings" do
@@ -341,6 +480,18 @@ describe SimpleXlsxReader do
341
480
  it "reads strings formatted as inlineStr" do
342
481
  @row[6].must_equal 'Cell G1'
343
482
  end
483
+
484
+ it "reads hyperlinks created via HYPERLINK()" do
485
+ @row[7].must_equal(
486
+ SXR::Hyperlink.new(
487
+ "http://www.example.com/hyperlink-function", "HYPERLINK function"))
488
+ end
489
+
490
+ it "reads hyperlinks created via the GUI" do
491
+ @row[8].must_equal(
492
+ SXR::Hyperlink.new(
493
+ "http://www.example.com/hyperlink-gui", "GUI-made hyperlink"))
494
+ end
344
495
  end
345
496
 
346
497
  describe 'parsing documents with blank rows' do
@@ -389,7 +540,7 @@ describe SimpleXlsxReader do
389
540
  end
390
541
 
391
542
  before do
392
- @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
543
+ @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows
393
544
  end
394
545
 
395
546
  it "reads row data despite gaps in row numbering" do
data/test/styles.xml CHANGED
@@ -1,6 +1,7 @@
1
1
  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2
2
  <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
3
- <numFmts count="1">
3
+ <numFmts count="2">
4
+ <numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
4
5
  <numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
5
6
  </numFmts>
6
7
  <fonts count="3" x14ac:knownFonts="1">
@@ -50,9 +51,10 @@
50
51
  <xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
51
52
  <xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
52
53
  </cellStyleXfs>
53
- <cellXfs count="3">
54
+ <cellXfs count="4">
54
55
  <xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
55
56
  <xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
+ <xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
56
58
  <xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
59
  </cellXfs>
58
60
  <cellStyles count="3">
data/test/test_helper.rb CHANGED
@@ -2,6 +2,7 @@ gem 'minitest'
2
2
  require 'minitest/autorun'
3
3
  require 'minitest/spec'
4
4
  require 'pry'
5
+ require 'time'
5
6
 
6
7
  $:.unshift File.expand_path("lib")
7
8
  require 'simple_xlsx_reader'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-02 00:00:00.000000000 Z
11
+ date: 2022-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '5.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: pry
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -68,12 +82,13 @@ dependencies:
68
82
  version: '0'
69
83
  description: Read xlsx data the Ruby way
70
84
  email:
71
- - woody@sigby.com
85
+ - woody.peterson@gmail.com
72
86
  executables: []
73
87
  extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - ".gitignore"
91
+ - ".travis.yml"
77
92
  - CHANGELOG.md
78
93
  - Gemfile
79
94
  - LICENSE.txt
@@ -86,6 +101,10 @@ files:
86
101
  - test/date1904_test.rb
87
102
  - test/datetime_test.rb
88
103
  - test/datetimes.xlsx
104
+ - test/gdocs_sheet.xlsx
105
+ - test/gdocs_sheet_test.rb
106
+ - test/lower_case_sharedstrings.xlsx
107
+ - test/lower_case_sharedstrings_test.rb
89
108
  - test/performance_test.rb
90
109
  - test/sesame_street_blog.xlsx
91
110
  - test/shared_strings.xml
@@ -93,7 +112,8 @@ files:
93
112
  - test/styles.xml
94
113
  - test/test_helper.rb
95
114
  homepage: ''
96
- licenses: []
115
+ licenses:
116
+ - MIT
97
117
  metadata: {}
98
118
  post_install_message:
99
119
  rdoc_options: []
@@ -110,8 +130,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
130
  - !ruby/object:Gem::Version
111
131
  version: '0'
112
132
  requirements: []
113
- rubyforge_project:
114
- rubygems_version: 2.2.0
133
+ rubygems_version: 3.1.6
115
134
  signing_key:
116
135
  specification_version: 4
117
136
  summary: Read xlsx data the Ruby way
@@ -120,6 +139,10 @@ test_files:
120
139
  - test/date1904_test.rb
121
140
  - test/datetime_test.rb
122
141
  - test/datetimes.xlsx
142
+ - test/gdocs_sheet.xlsx
143
+ - test/gdocs_sheet_test.rb
144
+ - test/lower_case_sharedstrings.xlsx
145
+ - test/lower_case_sharedstrings_test.rb
123
146
  - test/performance_test.rb
124
147
  - test/sesame_street_blog.xlsx
125
148
  - test/shared_strings.xml