simple_xlsx_reader 1.0.1 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 94422da0193805c579ba37c7c3e58b35a996dfbc
4
- data.tar.gz: a9c5e1f01acc0c60165a13adc1af087743a60935
2
+ SHA256:
3
+ metadata.gz: e2b04473235c5ed2c2764f62a627fa6f16816c36e0fcff3497be229f8666a0f7
4
+ data.tar.gz: 9367b0082f31e9cb208d9f97ed6cb67d5276a459562809460694602339dfdaad
5
5
  SHA512:
6
- metadata.gz: 33338f8fcf3c180ea346548061598953842358a21acd6d97bf451c07d8655f179af0cf7b7791f7c9de1a8411578e3623faab178b3cd74893aaf6d040a7abde96
7
- data.tar.gz: 50035b920f6811eed88c318c17b47bf8823aa1ac4bf114af3bc29174edcf08ebd5d16902177aa6a48b70f8e70a745249bb8494101f9f310f24d5f5d5bbc13f27
6
+ metadata.gz: cd42f7a0b8830a2f01703dca10ae779b973566ad25e3b74d31dc3693977fa5b2b3442e47bc1a3b50723bae3bb9f31facd923f1eaba06b51cc8b927e7fb207cf3
7
+ data.tar.gz: 38ecb026b0ad5a1985d88349a839a9d2972f85596504e6f300686f9751169a3c8d62582e79119106085a9cadc066517206da117993c3a30f48a5a0c58f256b4c
data/.travis.yml ADDED
@@ -0,0 +1,8 @@
1
+ language: ruby
2
+ cache: bundler
3
+ before_install:
4
+ - gem update bundler
5
+ rvm:
6
+ - 2.5.8
7
+ - 2.7.2
8
+ - 3.0.0
data/CHANGELOG.md CHANGED
@@ -1,3 +1,26 @@
1
+ ### 1.0.5
2
+
3
+ * Support string or io input via `SimpleXlsxReader#parse` (@kalsan, @til)
4
+
5
+ ### 1.0.4
6
+
7
+ * Fix Windows + RubyZip 1.2.1 bug preventing files from being read
8
+ * Add ability to parse hyperlinks
9
+ * Support files exported from Google Docs (@Strnadj)
10
+
11
+ ### 1.0.3
12
+
13
+ Broken on Ruby 1.9; yanked.
14
+
15
+ ### 1.0.2
16
+
17
+ * Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
18
+ * Better support for non-excel-generated xlsx files [bwlang]
19
+ * You don't always have a numFmtId column, and that's OK
20
+ * Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
21
+ * Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
22
+ * Be more flexible with custom formats using a numFmtId < 164
23
+
1
24
  ### 1.0.1
2
25
 
3
26
  * Add support for the 1904 date system [zilverline]
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # SimpleXlsxReader
1
+ # SimpleXlsxReader [![Build Status](https://travis-ci.org/woahdae/simple_xlsx_reader.svg?branch=master)](https://travis-ci.org/woahdae/simple_xlsx_reader)
2
2
 
3
3
  An xlsx reader for Ruby that parses xlsx cell values into plain ruby
4
4
  primitives and dates/times.
@@ -35,14 +35,21 @@ Here's the totality of the public api, in code:
35
35
 
36
36
  module SimpleXlsxReader
37
37
  def self.open(file_path)
38
- Document.new(file_path).tap(&:sheets)
38
+ Document.new(file_path: file_path).tap(&:sheets)
39
+ end
40
+
41
+ def self.parse(string_or_io)
42
+ Document.new(string_or_io: string_or_io).tap(&:sheets)
39
43
  end
40
44
 
41
45
  class Document
42
- attr_reader :file_path
46
+ attr_reader :string_or_io
47
+
48
+ def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
49
+ ((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
50
+ fail(ArgumentError, 'either file_path or string_or_io must be provided')
43
51
 
44
- def initialize(file_path)
45
- @file_path = file_path
52
+ @string_or_io = string_or_io || File.new(file_path || legacy_file_path)
46
53
  end
47
54
 
48
55
  def sheets
@@ -54,7 +61,7 @@ Here's the totality of the public api, in code:
54
61
  end
55
62
 
56
63
  def xml
57
- Xml.load(file_path)
64
+ Xml.load(string_or_io)
58
65
  end
59
66
 
60
67
  class Sheet < Struct.new(:name, :rows)
@@ -1,3 +1,3 @@
1
1
  module SimpleXlsxReader
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.5"
3
3
  end
@@ -19,6 +19,33 @@ end
19
19
  module SimpleXlsxReader
20
20
  class CellLoadError < StandardError; end
21
21
 
22
+ # We support hyperlinks as a "type" even though they're technically
23
+ # represented either as a function or an external reference in the xlsx spec.
24
+ #
25
+ # Since having hyperlink data in our sheet usually means we might want to do
26
+ # something primarily with the URL (store it in the database, download it, etc),
27
+ # we go through extra effort to parse the function or follow the reference
28
+ # to represent the hyperlink primarily as a URL. However, maybe we do want
29
+ # the hyperlink "friendly name" part (as MS calls it), so here we've subclassed
30
+ # string to tack on the friendly name. This means 80% of us that just want
31
+ # the URL value will have to do nothing extra, but the 20% that might want the
32
+ # friendly name can access it.
33
+ #
34
+ # Note, by default, the value we would get by just asking the cell would
35
+ # be the "friendly name" and *not* the URL, which is tucked away in the
36
+ # function definition or a separate "relationships" meta-document.
37
+ #
38
+ # See MS documentation on the HYPERLINK function for some background:
39
+ # https://support.office.com/en-us/article/HYPERLINK-function-333c7ce6-c5ae-4164-9c47-7de9b76f577f
40
+ class Hyperlink < String
41
+ attr_reader :friendly_name
42
+
43
+ def initialize(url, friendly_name = nil)
44
+ @friendly_name = friendly_name
45
+ super(url)
46
+ end
47
+ end
48
+
22
49
  def self.configuration
23
50
  @configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
24
51
  c.catch_cell_load_errors = false
@@ -26,14 +53,21 @@ module SimpleXlsxReader
26
53
  end
27
54
 
28
55
  def self.open(file_path)
29
- Document.new(file_path).tap(&:sheets)
56
+ Document.new(file_path: file_path).tap(&:sheets)
57
+ end
58
+
59
+ def self.parse(string_or_io)
60
+ Document.new(string_or_io: string_or_io).tap(&:sheets)
30
61
  end
31
62
 
32
63
  class Document
33
- attr_reader :file_path
64
+ attr_reader :string_or_io
65
+
66
+ def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
67
+ ((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
68
+ fail(ArgumentError, 'either file_path or string_or_io must be provided')
34
69
 
35
- def initialize(file_path)
36
- @file_path = file_path
70
+ @string_or_io = string_or_io || File.new(file_path || legacy_file_path)
37
71
  end
38
72
 
39
73
  def sheets
@@ -45,7 +79,7 @@ module SimpleXlsxReader
45
79
  end
46
80
 
47
81
  def xml
48
- Xml.load(file_path)
82
+ Xml.load(string_or_io)
49
83
  end
50
84
 
51
85
  class Sheet < Struct.new(:name, :rows)
@@ -69,28 +103,54 @@ module SimpleXlsxReader
69
103
  ##
70
104
  # For internal use; stores source xml in nokogiri documents
71
105
  class Xml
72
- attr_accessor :workbook, :shared_strings, :sheets, :styles
106
+ attr_accessor :workbook, :shared_strings, :sheets, :sheet_rels, :styles
73
107
 
74
- def self.load(file_path)
108
+ def self.load(string_or_io)
75
109
  self.new.tap do |xml|
76
- SimpleXlsxReader::Zip.open(file_path) do |zip|
77
- xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
78
- xml.styles = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
79
-
80
- # optional feature used by excel, but not often used by xlsx
81
- # generation libraries
82
- if zip.file.file?('xl/sharedStrings.xml')
83
- xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
84
- end
85
-
110
+ SimpleXlsxReader::Zip.open_buffer(string_or_io) do |zip|
86
111
  xml.sheets = []
87
- i = 0
88
- loop do
89
- i += 1
90
- break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
112
+ xml.sheet_rels = []
113
+
114
+ # This weird style of enumerating over the entries lets us
115
+ # concisely assign entries in a case insensitive and
116
+ # slash insensitive ('/' vs '\') manner.
117
+ #
118
+ # RubyZip used to normalize the slashes, but doesn't now:
119
+ # https://github.com/rubyzip/rubyzip/issues/324
120
+ zip.entries.each do |entry|
121
+ if entry.name.match(/^xl.workbook\.xml$/) # xl/workbook.xml
122
+ xml.workbook = Nokogiri::XML(zip.read(entry)).remove_namespaces!
123
+ elsif entry.name.match(/^xl.styles\.xml$/) # xl/styles.xml
124
+ xml.styles = Nokogiri::XML(zip.read(entry)).remove_namespaces!
125
+ elsif entry.name.match(/^xl.sharedStrings\.xml$/i) # xl/sharedStrings.xml
126
+ # optional feature used by excel, but not often used by xlsx
127
+ # generation libraries. Path name is sometimes lowercase, too.
128
+ xml.shared_strings = Nokogiri::XML(zip.read(entry)).remove_namespaces!
129
+ elsif match = entry.name.match(/^xl.worksheets.sheet([0-9]*)\.xml$/)
130
+ sheet_number = match.captures.first.to_i
131
+ xml.sheets[sheet_number] =
132
+ Nokogiri::XML(zip.read(entry)).remove_namespaces!
133
+ elsif match = entry.name.match(/^xl.worksheets._rels.sheet([0-9]*)\.xml\.rels$/)
134
+ sheet_number = match.captures.first.to_i
135
+ xml.sheet_rels[sheet_number] =
136
+ Nokogiri::XML(zip.read(entry)).remove_namespaces!
137
+ end
138
+ end
91
139
 
92
- xml.sheets <<
93
- Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
140
+ # Sometimes there's a zero-index sheet.xml, ex.
141
+ # Google Docs creates:
142
+ #
143
+ # xl/worksheets/sheet.xml
144
+ # xl/worksheets/sheet1.xml
145
+ # xl/worksheets/sheet2.xml
146
+ # While Excel creates:
147
+ # xl/worksheets/sheet1.xml
148
+ # xl/worksheets/sheet2.xml
149
+ #
150
+ # So, for the latter case, let's shift [null, <Sheet 1>, <Sheet 2>]
151
+ if !xml.sheets[0]
152
+ xml.sheets.shift
153
+ xml.sheet_rels.shift
94
154
  end
95
155
  end
96
156
  end
@@ -105,7 +165,7 @@ module SimpleXlsxReader
105
165
 
106
166
  def load_sheets
107
167
  sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
108
- parse_sheet(sheet_name, xml.sheets[i]) # sheet_number is *not* the index into xml.sheets
168
+ parse_sheet(sheet_name, xml.sheets[i], xml.sheet_rels[i]) # sheet_number is *not* the index into xml.sheets
109
169
  end
110
170
  end
111
171
 
@@ -121,9 +181,10 @@ module SimpleXlsxReader
121
181
  end
122
182
  end
123
183
 
124
- def parse_sheet(sheet_name, xsheet)
184
+ def parse_sheet(sheet_name, xsheet, xrels)
125
185
  sheet = Sheet.new(sheet_name)
126
186
  sheet_width, sheet_height = *sheet_dimensions(xsheet)
187
+ cells_w_links = xsheet.xpath('//hyperlinks/hyperlink').inject({}) {|acc, e| acc[e.attr(:ref)] = e.attr(:id); acc}
127
188
 
128
189
  sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
129
190
  xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
@@ -148,10 +209,21 @@ module SimpleXlsxReader
148
209
  # by about 60%. Odd.
149
210
  xvalue = type == 'inlineStr' ?
150
211
  (xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
151
- xcell.children.find {|c| c.name == 'v'}
212
+ xcell.children.find {|c| c.name == 'f' && c.text.start_with?('HYPERLINK(') || c.name == 'v'}
213
+
214
+ if xvalue
215
+ value = xvalue.text.strip
216
+
217
+ if rel_id = cells_w_links[xcell.attr('r')] # a hyperlink made via GUI
218
+ url = xrels.at_xpath(%(//*[@Id="#{rel_id}"])).attr('Target')
219
+ elsif xvalue.name == 'f' # only time we have a function is if it's a hyperlink
220
+ url = value.slice(/HYPERLINK\("(.*?)"/, 1)
221
+ end
222
+ end
152
223
 
153
224
  cell = begin
154
- self.class.cast(xvalue && xvalue.text.strip, type, style,
225
+ self.class.cast(value, type, style,
226
+ :url => url,
155
227
  :shared_strings => shared_strings,
156
228
  :base_date => base_date)
157
229
  rescue => e
@@ -218,11 +290,13 @@ module SimpleXlsxReader
218
290
  # 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
219
291
  # 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
220
292
  def column_letter_to_number(column_letter)
221
- pow = -1
222
- column_letter.codepoints.reverse.inject(0) do |acc, charcode|
223
- pow += 1
224
- acc + 26**pow * (charcode - 64)
293
+ pow = column_letter.length - 1
294
+ result = 0
295
+ column_letter.each_byte do |b|
296
+ result += 26**pow * (b - 64)
297
+ pow -= 1
225
298
  end
299
+ result
226
300
  end
227
301
 
228
302
  # Excel doesn't record types for some cells, only its display style, so
@@ -241,21 +315,32 @@ module SimpleXlsxReader
241
315
  # type.
242
316
  def style_types
243
317
  @style_types ||=
244
- xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
245
- style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
318
+ xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
319
+ style_type_by_num_fmt_id(num_fmt_id(xstyle))}
320
+ end
321
+
322
+ #returns the numFmtId value if it's available
323
+ def num_fmt_id(xstyle)
324
+ if xstyle.attributes['numFmtId']
325
+ xstyle.attributes['numFmtId'].value
326
+ else
327
+ nil
328
+ end
246
329
  end
247
330
 
248
331
  # Finds the type we think a style is; For example, fmtId 14 is a date
249
- # style, so this would return :date
332
+ # style, so this would return :date.
333
+ #
334
+ # Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
335
+ # but in practice can sometimes be simply out of the usual "Any Language"
336
+ # id range that goes up to 49. For example, I have seen a numFmtId of
337
+ # 59 specified as a date. In Thai, 59 is a number format, so this seems
338
+ # like a bad idea, but we try to be flexible and just go with it.
250
339
  def style_type_by_num_fmt_id(id)
251
340
  return nil if id.nil?
252
341
 
253
342
  id = id.to_i
254
- if id >= 164 # custom style, arg!
255
- custom_style_types[id]
256
- else # we should know this one
257
- NumFmtMap[id]
258
- end
343
+ NumFmtMap[id] || custom_style_types[id]
259
344
  end
260
345
 
261
346
  # Map of (numFmtId >= 164) (custom styles) to our best guess at the type
@@ -314,7 +399,7 @@ module SimpleXlsxReader
314
399
  type = style
315
400
  end
316
401
 
317
- case type
402
+ casted = case type
318
403
 
319
404
  ##
320
405
  # There are few built-in types
@@ -347,15 +432,15 @@ module SimpleXlsxReader
347
432
  # the trickiest. note that all these formats can vary on
348
433
  # whether they actually contain a date, time, or datetime.
349
434
  when :date, :time, :date_time
350
- days_since_date_system_start, fraction_of_24 = value.split('.')
435
+ value = Float(value)
436
+ days_since_date_system_start = value.to_i
437
+ fraction_of_24 = value - days_since_date_system_start
351
438
 
352
439
  # http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
353
- date = options.fetch(:base_date, DATE_SYSTEM_1900) + Integer(days_since_date_system_start)
354
-
355
- if fraction_of_24 # there is a time associated
356
- fraction_of_24 = "0.#{fraction_of_24}".to_f
357
- seconds = (fraction_of_24 * 86400).round
440
+ date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
358
441
 
442
+ if fraction_of_24 > 0 # there is a time associated
443
+ seconds = (fraction_of_24 * 86400).round
359
444
  return Time.utc(date.year, date.month, date.day) + seconds
360
445
  else
361
446
  return date
@@ -374,6 +459,12 @@ module SimpleXlsxReader
374
459
  else
375
460
  value
376
461
  end
462
+
463
+ if options[:url]
464
+ Hyperlink.new(options[:url], casted)
465
+ else
466
+ casted
467
+ end
377
468
  end
378
469
 
379
470
  ## Returns the base_date from which to calculate dates.
@@ -7,19 +7,21 @@ Gem::Specification.new do |gem|
7
7
  gem.name = "simple_xlsx_reader"
8
8
  gem.version = SimpleXlsxReader::VERSION
9
9
  gem.authors = ["Woody Peterson"]
10
- gem.email = ["woody@sigby.com"]
10
+ gem.email = ["woody.peterson@gmail.com"]
11
11
  gem.description = %q{Read xlsx data the Ruby way}
12
12
  gem.summary = %q{Read xlsx data the Ruby way}
13
13
  gem.homepage = ""
14
+ gem.license = "MIT"
14
15
 
15
16
  gem.add_dependency 'nokogiri'
16
17
  gem.add_dependency 'rubyzip'
17
18
 
18
19
  gem.add_development_dependency 'minitest', '>= 5.0'
20
+ gem.add_development_dependency 'rake'
19
21
  gem.add_development_dependency 'pry'
20
22
 
21
23
  gem.files = `git ls-files`.split($/)
22
24
  gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
23
- gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
25
+ gem.test_files = gem.files.grep(%r{^test/})
24
26
  gem.require_paths = ["lib"]
25
27
  end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:date1904_file) { File.join(File.dirname(__FILE__), 'date1904.xlsx') }
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:datetimes_file) { File.join(File.dirname(__FILE__),
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
11
11
  "Datetimes" =>
12
12
  [[Time.parse("2013-08-19 18:29:59 UTC")],
13
13
  [Time.parse("2013-08-19 18:30:00 UTC")],
14
- [Time.parse("2013-08-19 18:30:01 UTC")]]
14
+ [Time.parse("2013-08-19 18:30:01 UTC")],
15
+ [Time.parse("1899-12-30 00:30:00 UTC")]]
15
16
  })
16
17
  end
17
18
 
data/test/datetimes.xlsx CHANGED
Binary file
Binary file
@@ -0,0 +1,15 @@
1
+ require_relative 'test_helper'
2
+ require 'time'
3
+
4
+ describe SimpleXlsxReader do
5
+ let(:one_sheet_file) { File.join(File.dirname(__FILE__), 'gdocs_sheet.xlsx') }
6
+ let(:subject) { SimpleXlsxReader::Document.new(one_sheet_file) }
7
+
8
+ it 'able to load file from google docs' do
9
+ subject.to_hash.must_equal({
10
+ "List 1" => [["Empty gdocs list 1"]],
11
+ "List 2" => [["Empty gdocs list 2"]]
12
+ })
13
+ end
14
+
15
+ end
Binary file
@@ -0,0 +1,15 @@
1
+ require_relative 'test_helper'
2
+
3
+ describe SimpleXlsxReader do
4
+ let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
5
+ 'lower_case_sharedstrings.xlsx') }
6
+
7
+ let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
8
+
9
+
10
+ describe '#to_hash' do
11
+ it 'should have the word Well in the first row' do
12
+ subject.sheets.first.rows[0].must_include('Well')
13
+ end
14
+ end
15
+ end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'minitest/benchmark'
3
3
 
4
4
  describe 'SimpleXlsxReader Benchmark' do
@@ -96,13 +96,13 @@ describe 'SimpleXlsxReader Benchmark' do
96
96
  bench_exp(1,10000)
97
97
  end
98
98
 
99
- bench_performance_linear 'parses sheets in linear time', 0.9999 do |n|
99
+ bench_performance_linear 'parses sheets in linear time', 0.999 do |n|
100
100
 
101
101
  raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
102
102
  if @xml.sheets[n].nil?
103
103
 
104
104
  sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
105
- parse_sheet('test', @xml.sheets[n])
105
+ parse_sheet('test', @xml.sheets[n], nil)
106
106
 
107
107
  raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
108
108
  if sheet.rows.size != n + 1
Binary file
@@ -1,26 +1,66 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'time'
3
3
 
4
+ SXR = SimpleXlsxReader
5
+
4
6
  describe SimpleXlsxReader do
5
- let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
6
- 'sesame_street_blog.xlsx') }
7
-
8
- let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
9
-
10
- describe '#to_hash' do
11
- it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
12
- subject.to_hash.must_equal({
13
- "Authors"=>
14
- [["Name", "Occupation"],
15
- ["Big Bird", "Teacher"]],
16
-
17
- "Posts"=>
18
- [["Author Name", "Title", "Body", "Created At", "Comment Count"],
19
- ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
20
- ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2],
21
- ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0],
22
- ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil]]
23
- })
7
+ let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
8
+ let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
9
+ let(:expected_result) do
10
+ {
11
+ "Authors"=>
12
+ [["Name", "Occupation"],
13
+ ["Big Bird", "Teacher"]],
14
+ "Posts"=>
15
+ [["Author Name", "Title", "Body", "Created At", "Comment Count", "URL"],
16
+ ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1, SXR::Hyperlink.new("http://www.example.com/hyperlink-function", "This uses the HYPERLINK() function")],
17
+ ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2, SXR::Hyperlink.new("http://www.example.com/hyperlink-gui", "This uses the hyperlink GUI option")],
18
+ ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0, nil],
19
+ ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil, nil]]
20
+ }
21
+ end
22
+
23
+ describe SimpleXlsxReader do
24
+ describe 'load from file path' do
25
+ let(:subject) { SimpleXlsxReader.open(sesame_street_blog_file_path) }
26
+
27
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
28
+ subject.to_hash.must_equal(expected_result)
29
+ end
30
+ end
31
+
32
+ describe 'load from buffer' do
33
+ let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
34
+
35
+ it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
36
+ subject.to_hash.must_equal(expected_result)
37
+ end
38
+ end
39
+ end
40
+
41
+ describe SimpleXlsxReader::Document do
42
+ describe 'load from file path' do
43
+ let(:subject) { SimpleXlsxReader::Document.new(file_path: sesame_street_blog_file_path) }
44
+
45
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
46
+ subject.to_hash.must_equal(expected_result)
47
+ end
48
+ end
49
+
50
+ describe 'load from buffer' do
51
+ let(:subject) { SimpleXlsxReader::Document.new(string_or_io: sesame_street_blog_io) }
52
+
53
+ it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
54
+ subject.to_hash.must_equal(expected_result)
55
+ end
56
+ end
57
+
58
+ describe 'load from file path (legacy API)' do
59
+ let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file_path) }
60
+
61
+ it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
62
+ subject.to_hash.must_equal(expected_result)
63
+ end
24
64
  end
25
65
  end
26
66
 
@@ -63,10 +103,33 @@ describe SimpleXlsxReader do
63
103
  must_equal Time.parse('2013-08-19 18:30 UTC')
64
104
  end
65
105
 
106
+ it 'reads less-than-zero complex number types styled as times' do
107
+ described_class.cast('6.25E-2', 'n', :time).
108
+ must_equal Time.parse('1899-12-30 01:30:00 UTC')
109
+ end
110
+
66
111
  it 'reads number types styled as date_times' do
67
112
  described_class.cast('41505.77083', 'n', :date_time).
68
113
  must_equal Time.parse('2013-08-19 18:30 UTC')
69
114
  end
115
+
116
+ it 'raises when date-styled values are not numerical' do
117
+ lambda { described_class.cast('14 is not a valid date', nil, :date) }.
118
+ must_raise(ArgumentError)
119
+ end
120
+
121
+ describe "with the url option" do
122
+ let(:url) { "http://www.example.com/hyperlink" }
123
+ it 'creates a hyperlink with a string type' do
124
+ described_class.cast("A link", 'str', :string, url: url).
125
+ must_equal SXR::Hyperlink.new(url, "A link")
126
+ end
127
+
128
+ it 'creates a hyperlink with a shared string type' do
129
+ described_class.cast("2", 's', nil, shared_strings: ['a','b','c'], url: url).
130
+ must_equal SXR::Hyperlink.new(url, 'c')
131
+ end
132
+ end
70
133
  end
71
134
 
72
135
  describe '#shared_strings' do
@@ -102,6 +165,13 @@ describe SimpleXlsxReader do
102
165
 
103
166
  it 'reads custom formatted styles (numFmtId >= 164)' do
104
167
  mapper.style_types[1].must_equal :date_time
168
+ mapper.custom_style_types[164].must_equal :date_time
169
+ end
170
+
171
+ # something I've seen in the wild; don't think it's correct, but let's be flexible.
172
+ it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
173
+ mapper.style_types[2].must_equal :date_time
174
+ mapper.custom_style_types[59].must_equal :date_time
105
175
  end
106
176
  end
107
177
 
@@ -246,16 +316,55 @@ describe SimpleXlsxReader do
246
316
  it 'raises if configuration.catch_cell_load_errors' do
247
317
  SimpleXlsxReader.configuration.catch_cell_load_errors = false
248
318
 
249
- lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
319
+ lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first, nil) }.
250
320
  must_raise(SimpleXlsxReader::CellLoadError)
251
321
  end
252
322
 
253
323
  it 'records a load error if not configuration.catch_cell_load_errors' do
254
324
  SimpleXlsxReader.configuration.catch_cell_load_errors = true
255
325
 
256
- sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
257
- sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
326
+ sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil)
327
+ sheet.load_errors[[0,0]].must_include 'invalid value for Float'
328
+ end
329
+ end
330
+
331
+ describe "missing numFmtId attributes" do
332
+
333
+ let(:xml) do
334
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
335
+ xml.sheets = [Nokogiri::XML(
336
+ <<-XML
337
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
338
+ <dimension ref="A1:A1" />
339
+ <sheetData>
340
+ <row>
341
+ <c r='A1' s='s'>
342
+ <v>some content</v>
343
+ </c>
344
+ </row>
345
+ </sheetData>
346
+ </worksheet>
347
+ XML
348
+ ).remove_namespaces!]
349
+
350
+ xml.styles = Nokogiri::XML(
351
+ <<-XML
352
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
353
+
354
+ </styleSheet>
355
+ XML
356
+ ).remove_namespaces!
357
+ end
358
+ end
359
+
360
+ before do
361
+ @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows[0]
258
362
  end
363
+
364
+ it 'continues even when cells are missing numFmtId attributes ' do
365
+ @row[0].must_equal 'some content'
366
+ end
367
+
259
368
  end
260
369
 
261
370
  describe 'parsing types' do
@@ -284,8 +393,21 @@ describe SimpleXlsxReader do
284
393
  <c r='G1' t='inlineStr' s='0'>
285
394
  <is><t>Cell G1</t></is>
286
395
  </c>
396
+
397
+ <c r='H1' s='0'>
398
+ <f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
399
+ <v>HYPERLINK function</v>
400
+ </c>
401
+
402
+ <c r='I1' s='0'>
403
+ <v>GUI-made hyperlink</v>
404
+ </c>
287
405
  </row>
288
406
  </sheetData>
407
+
408
+ <hyperlinks>
409
+ <hyperlink ref="I1" id="rId1"/>
410
+ </hyperlinks>
289
411
  </worksheet>
290
412
  XML
291
413
  ).remove_namespaces!]
@@ -303,11 +425,28 @@ describe SimpleXlsxReader do
303
425
  </styleSheet>
304
426
  XML
305
427
  ).remove_namespaces!
428
+
429
+ # Although not a "type" or "style" according to xlsx spec,
430
+ # it sure could/should be, so let's test it with the rest of our
431
+ # typecasting code.
432
+ xml.sheet_rels = [Nokogiri::XML(
433
+ <<-XML
434
+ <Relationships>
435
+ <Relationship
436
+ Id="rId1"
437
+ Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
438
+ Target="http://www.example.com/hyperlink-gui"
439
+ TargetMode="External"
440
+ />
441
+ </Relationships>
442
+ XML
443
+ ).remove_namespaces!]
444
+
306
445
  end
307
446
  end
308
447
 
309
448
  before do
310
- @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
449
+ @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, xml.sheet_rels.first).rows[0]
311
450
  end
312
451
 
313
452
  it "reads 'Generic' cells as strings" do
@@ -341,6 +480,18 @@ describe SimpleXlsxReader do
341
480
  it "reads strings formatted as inlineStr" do
342
481
  @row[6].must_equal 'Cell G1'
343
482
  end
483
+
484
+ it "reads hyperlinks created via HYPERLINK()" do
485
+ @row[7].must_equal(
486
+ SXR::Hyperlink.new(
487
+ "http://www.example.com/hyperlink-function", "HYPERLINK function"))
488
+ end
489
+
490
+ it "reads hyperlinks created via the GUI" do
491
+ @row[8].must_equal(
492
+ SXR::Hyperlink.new(
493
+ "http://www.example.com/hyperlink-gui", "GUI-made hyperlink"))
494
+ end
344
495
  end
345
496
 
346
497
  describe 'parsing documents with blank rows' do
@@ -389,7 +540,7 @@ describe SimpleXlsxReader do
389
540
  end
390
541
 
391
542
  before do
392
- @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
543
+ @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows
393
544
  end
394
545
 
395
546
  it "reads row data despite gaps in row numbering" do
data/test/styles.xml CHANGED
@@ -1,6 +1,7 @@
1
1
  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2
2
  <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
3
- <numFmts count="1">
3
+ <numFmts count="2">
4
+ <numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
4
5
  <numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
5
6
  </numFmts>
6
7
  <fonts count="3" x14ac:knownFonts="1">
@@ -50,9 +51,10 @@
50
51
  <xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
51
52
  <xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
52
53
  </cellStyleXfs>
53
- <cellXfs count="3">
54
+ <cellXfs count="4">
54
55
  <xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
55
56
  <xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
+ <xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
56
58
  <xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
59
  </cellXfs>
58
60
  <cellStyles count="3">
data/test/test_helper.rb CHANGED
@@ -2,6 +2,7 @@ gem 'minitest'
2
2
  require 'minitest/autorun'
3
3
  require 'minitest/spec'
4
4
  require 'pry'
5
+ require 'time'
5
6
 
6
7
  $:.unshift File.expand_path("lib")
7
8
  require 'simple_xlsx_reader'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-02 00:00:00.000000000 Z
11
+ date: 2022-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '5.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: pry
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -68,12 +82,13 @@ dependencies:
68
82
  version: '0'
69
83
  description: Read xlsx data the Ruby way
70
84
  email:
71
- - woody@sigby.com
85
+ - woody.peterson@gmail.com
72
86
  executables: []
73
87
  extensions: []
74
88
  extra_rdoc_files: []
75
89
  files:
76
90
  - ".gitignore"
91
+ - ".travis.yml"
77
92
  - CHANGELOG.md
78
93
  - Gemfile
79
94
  - LICENSE.txt
@@ -86,6 +101,10 @@ files:
86
101
  - test/date1904_test.rb
87
102
  - test/datetime_test.rb
88
103
  - test/datetimes.xlsx
104
+ - test/gdocs_sheet.xlsx
105
+ - test/gdocs_sheet_test.rb
106
+ - test/lower_case_sharedstrings.xlsx
107
+ - test/lower_case_sharedstrings_test.rb
89
108
  - test/performance_test.rb
90
109
  - test/sesame_street_blog.xlsx
91
110
  - test/shared_strings.xml
@@ -93,7 +112,8 @@ files:
93
112
  - test/styles.xml
94
113
  - test/test_helper.rb
95
114
  homepage: ''
96
- licenses: []
115
+ licenses:
116
+ - MIT
97
117
  metadata: {}
98
118
  post_install_message:
99
119
  rdoc_options: []
@@ -110,8 +130,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
130
  - !ruby/object:Gem::Version
111
131
  version: '0'
112
132
  requirements: []
113
- rubyforge_project:
114
- rubygems_version: 2.2.0
133
+ rubygems_version: 3.1.6
115
134
  signing_key:
116
135
  specification_version: 4
117
136
  summary: Read xlsx data the Ruby way
@@ -120,6 +139,10 @@ test_files:
120
139
  - test/date1904_test.rb
121
140
  - test/datetime_test.rb
122
141
  - test/datetimes.xlsx
142
+ - test/gdocs_sheet.xlsx
143
+ - test/gdocs_sheet_test.rb
144
+ - test/lower_case_sharedstrings.xlsx
145
+ - test/lower_case_sharedstrings_test.rb
123
146
  - test/performance_test.rb
124
147
  - test/sesame_street_blog.xlsx
125
148
  - test/shared_strings.xml