simple_xlsx_reader 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 94422da0193805c579ba37c7c3e58b35a996dfbc
4
- data.tar.gz: a9c5e1f01acc0c60165a13adc1af087743a60935
3
+ metadata.gz: d3b2fbcc0148d773ed19a266e64877600863730c
4
+ data.tar.gz: 61be9bbca611e49dc7ca725dd33d1560eec98018
5
5
  SHA512:
6
- metadata.gz: 33338f8fcf3c180ea346548061598953842358a21acd6d97bf451c07d8655f179af0cf7b7791f7c9de1a8411578e3623faab178b3cd74893aaf6d040a7abde96
7
- data.tar.gz: 50035b920f6811eed88c318c17b47bf8823aa1ac4bf114af3bc29174edcf08ebd5d16902177aa6a48b70f8e70a745249bb8494101f9f310f24d5f5d5bbc13f27
6
+ metadata.gz: 041cd7eb5b2ebeeb310da52c0e173ff7c04ac95b55ae1c35cc510280c7d10e9ea6af703d10f2bed1f5f5678c77465fdd5807de2d8b3e4ccf7f1a01475178f487
7
+ data.tar.gz: 8587c21a003e2bf4af53b2c67348102d85713e997bc15e248f2b806d4d7388b078bb7a5315c503931a0f287e1a7771d990913878d916d010c36040b6777b98ab
@@ -1,3 +1,11 @@
1
+ ### 1.0.2
2
+
3
+ * Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
4
+ * Better support for non-excel-generated xlsx files [bwlang]
5
+ * You don't always have a numFmtId column, and that's OK
6
+ * Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
7
+ * Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
8
+
1
9
  ### 1.0.1
2
10
 
3
11
  * Add support for the 1904 date system [zilverline]
@@ -79,8 +79,9 @@ module SimpleXlsxReader
79
79
 
80
80
  # optional feature used by excel, but not often used by xlsx
81
81
  # generation libraries
82
- if zip.file.file?('xl/sharedStrings.xml')
83
- xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
82
+ ss_file = (zip.to_a.map(&:name) & ['xl/sharedStrings.xml','xl/sharedstrings.xml'])[0]
83
+ if ss_file
84
+ xml.shared_strings = Nokogiri::XML(zip.read(ss_file)).remove_namespaces!
84
85
  end
85
86
 
86
87
  xml.sheets = []
@@ -218,11 +219,13 @@ module SimpleXlsxReader
218
219
  # 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
219
220
  # 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
220
221
  def column_letter_to_number(column_letter)
221
- pow = -1
222
- column_letter.codepoints.reverse.inject(0) do |acc, charcode|
223
- pow += 1
224
- acc + 26**pow * (charcode - 64)
222
+ pow = column_letter.length - 1
223
+ result = 0
224
+ column_letter.each_byte do |b|
225
+ result += 26**pow * (b - 64)
226
+ pow -= 1
225
227
  end
228
+ result
226
229
  end
227
230
 
228
231
  # Excel doesn't record types for some cells, only its display style, so
@@ -241,21 +244,32 @@ module SimpleXlsxReader
241
244
  # type.
242
245
  def style_types
243
246
  @style_types ||=
244
- xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
245
- style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
247
+ xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
248
+ style_type_by_num_fmt_id(num_fmt_id(xstyle))}
249
+ end
250
+
251
+ #returns the numFmtId value if it's available
252
+ def num_fmt_id(xstyle)
253
+ if xstyle.attributes['numFmtId']
254
+ xstyle.attributes['numFmtId'].value
255
+ else
256
+ nil
257
+ end
246
258
  end
247
259
 
248
260
  # Finds the type we think a style is; For example, fmtId 14 is a date
249
- # style, so this would return :date
261
+ # style, so this would return :date.
262
+ #
263
+ # Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
264
+ # but in practice can sometimes be simply out of the usual "Any Language"
265
+ # id range that goes up to 49. For example, I have seen a numFmtId of
266
+ # 59 specified as a date. In Thai, 59 is a number format, so this seems
267
+ # like a bad idea, but we try to be flexible and just go with it.
250
268
  def style_type_by_num_fmt_id(id)
251
269
  return nil if id.nil?
252
270
 
253
271
  id = id.to_i
254
- if id >= 164 # custom style, arg!
255
- custom_style_types[id]
256
- else # we should know this one
257
- NumFmtMap[id]
258
- end
272
+ NumFmtMap[id] || custom_style_types[id]
259
273
  end
260
274
 
261
275
  # Map of (numFmtId >= 164) (custom styles) to our best guess at the type
@@ -347,15 +361,15 @@ module SimpleXlsxReader
347
361
  # the trickiest. note that all these formats can vary on
348
362
  # whether they actually contain a date, time, or datetime.
349
363
  when :date, :time, :date_time
350
- days_since_date_system_start, fraction_of_24 = value.split('.')
364
+ value = value.to_f
365
+ days_since_date_system_start = value.to_i
366
+ fraction_of_24 = value - days_since_date_system_start
351
367
 
352
368
  # http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
353
- date = options.fetch(:base_date, DATE_SYSTEM_1900) + Integer(days_since_date_system_start)
354
-
355
- if fraction_of_24 # there is a time associated
356
- fraction_of_24 = "0.#{fraction_of_24}".to_f
357
- seconds = (fraction_of_24 * 86400).round
369
+ date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
358
370
 
371
+ if fraction_of_24 > 0 # there is a time associated
372
+ seconds = (fraction_of_24 * 86400).round
359
373
  return Time.utc(date.year, date.month, date.day) + seconds
360
374
  else
361
375
  return date
@@ -1,3 +1,3 @@
1
1
  module SimpleXlsxReader
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.2"
3
3
  end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:date1904_file) { File.join(File.dirname(__FILE__), 'date1904.xlsx') }
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:datetimes_file) { File.join(File.dirname(__FILE__),
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
11
11
  "Datetimes" =>
12
12
  [[Time.parse("2013-08-19 18:29:59 UTC")],
13
13
  [Time.parse("2013-08-19 18:30:00 UTC")],
14
- [Time.parse("2013-08-19 18:30:01 UTC")]]
14
+ [Time.parse("2013-08-19 18:30:01 UTC")],
15
+ [Time.parse("1899-12-30 00:30:00 UTC")]]
15
16
  })
16
17
  end
17
18
 
Binary file
@@ -0,0 +1,15 @@
1
+ require_relative 'test_helper'
2
+
3
+ describe SimpleXlsxReader do
4
+ let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
5
+ 'lower_case_sharedstrings.xlsx') }
6
+
7
+ let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
8
+
9
+
10
+ describe '#to_hash' do
11
+ it 'should have the word Well in the first row' do
12
+ subject.sheets.first.rows[0].must_include('Well')
13
+ end
14
+ end
15
+ end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'minitest/benchmark'
3
3
 
4
4
  describe 'SimpleXlsxReader Benchmark' do
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'time'
3
3
 
4
4
  describe SimpleXlsxReader do
@@ -102,6 +102,13 @@ describe SimpleXlsxReader do
102
102
 
103
103
  it 'reads custom formatted styles (numFmtId >= 164)' do
104
104
  mapper.style_types[1].must_equal :date_time
105
+ mapper.custom_style_types[164].must_equal :date_time
106
+ end
107
+
108
+ # something I've seen in the wild; don't think it's correct, but let's be flexible.
109
+ it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
110
+ mapper.style_types[2].must_equal :date_time
111
+ mapper.custom_style_types[59].must_equal :date_time
105
112
  end
106
113
  end
107
114
 
@@ -258,6 +265,45 @@ describe SimpleXlsxReader do
258
265
  end
259
266
  end
260
267
 
268
+ describe "missing numFmtId attributes" do
269
+
270
+ let(:xml) do
271
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
272
+ xml.sheets = [Nokogiri::XML(
273
+ <<-XML
274
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
275
+ <dimension ref="A1:A1" />
276
+ <sheetData>
277
+ <row>
278
+ <c r='A1' s='s'>
279
+ <v>some content</v>
280
+ </c>
281
+ </row>
282
+ </sheetData>
283
+ </worksheet>
284
+ XML
285
+ ).remove_namespaces!]
286
+
287
+ xml.styles = Nokogiri::XML(
288
+ <<-XML
289
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
290
+
291
+ </styleSheet>
292
+ XML
293
+ ).remove_namespaces!
294
+ end
295
+ end
296
+
297
+ before do
298
+ @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
299
+ end
300
+
301
+ it 'continues even when cells are missing numFmtId attributes ' do
302
+ @row[0].must_equal 'some content'
303
+ end
304
+
305
+ end
306
+
261
307
  describe 'parsing types' do
262
308
  let(:xml) do
263
309
  SimpleXlsxReader::Document::Xml.new.tap do |xml|
@@ -1,6 +1,7 @@
1
1
  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2
2
  <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
3
- <numFmts count="1">
3
+ <numFmts count="2">
4
+ <numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
4
5
  <numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
5
6
  </numFmts>
6
7
  <fonts count="3" x14ac:knownFonts="1">
@@ -50,9 +51,10 @@
50
51
  <xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
51
52
  <xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
52
53
  </cellStyleXfs>
53
- <cellXfs count="3">
54
+ <cellXfs count="4">
54
55
  <xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
55
56
  <xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
+ <xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
56
58
  <xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
59
  </cellXfs>
58
60
  <cellStyles count="3">
@@ -2,6 +2,7 @@ gem 'minitest'
2
2
  require 'minitest/autorun'
3
3
  require 'minitest/spec'
4
4
  require 'pry'
5
+ require 'time'
5
6
 
6
7
  $:.unshift File.expand_path("lib")
7
8
  require 'simple_xlsx_reader'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-02 00:00:00.000000000 Z
11
+ date: 2015-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -86,6 +86,8 @@ files:
86
86
  - test/date1904_test.rb
87
87
  - test/datetime_test.rb
88
88
  - test/datetimes.xlsx
89
+ - test/lower_case_sharedstrings.xlsx
90
+ - test/lower_case_sharedstrings_test.rb
89
91
  - test/performance_test.rb
90
92
  - test/sesame_street_blog.xlsx
91
93
  - test/shared_strings.xml
@@ -111,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
113
  version: '0'
112
114
  requirements: []
113
115
  rubyforge_project:
114
- rubygems_version: 2.2.0
116
+ rubygems_version: 2.2.2
115
117
  signing_key:
116
118
  specification_version: 4
117
119
  summary: Read xlsx data the Ruby way
@@ -120,6 +122,8 @@ test_files:
120
122
  - test/date1904_test.rb
121
123
  - test/datetime_test.rb
122
124
  - test/datetimes.xlsx
125
+ - test/lower_case_sharedstrings.xlsx
126
+ - test/lower_case_sharedstrings_test.rb
123
127
  - test/performance_test.rb
124
128
  - test/sesame_street_blog.xlsx
125
129
  - test/shared_strings.xml