simple_xlsx_reader 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 94422da0193805c579ba37c7c3e58b35a996dfbc
4
- data.tar.gz: a9c5e1f01acc0c60165a13adc1af087743a60935
3
+ metadata.gz: d3b2fbcc0148d773ed19a266e64877600863730c
4
+ data.tar.gz: 61be9bbca611e49dc7ca725dd33d1560eec98018
5
5
  SHA512:
6
- metadata.gz: 33338f8fcf3c180ea346548061598953842358a21acd6d97bf451c07d8655f179af0cf7b7791f7c9de1a8411578e3623faab178b3cd74893aaf6d040a7abde96
7
- data.tar.gz: 50035b920f6811eed88c318c17b47bf8823aa1ac4bf114af3bc29174edcf08ebd5d16902177aa6a48b70f8e70a745249bb8494101f9f310f24d5f5d5bbc13f27
6
+ metadata.gz: 041cd7eb5b2ebeeb310da52c0e173ff7c04ac95b55ae1c35cc510280c7d10e9ea6af703d10f2bed1f5f5678c77465fdd5807de2d8b3e4ccf7f1a01475178f487
7
+ data.tar.gz: 8587c21a003e2bf4af53b2c67348102d85713e997bc15e248f2b806d4d7388b078bb7a5315c503931a0f287e1a7771d990913878d916d010c36040b6777b98ab
@@ -1,3 +1,11 @@
1
+ ### 1.0.2
2
+
3
+ * Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
4
+ * Better support for non-excel-generated xlsx files [bwlang]
5
+ * You don't always have a numFmtId column, and that's OK
6
+ * Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
7
+ * Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
8
+
1
9
  ### 1.0.1
2
10
 
3
11
  * Add support for the 1904 date system [zilverline]
@@ -79,8 +79,9 @@ module SimpleXlsxReader
79
79
 
80
80
  # optional feature used by excel, but not often used by xlsx
81
81
  # generation libraries
82
- if zip.file.file?('xl/sharedStrings.xml')
83
- xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
82
+ ss_file = (zip.to_a.map(&:name) & ['xl/sharedStrings.xml','xl/sharedstrings.xml'])[0]
83
+ if ss_file
84
+ xml.shared_strings = Nokogiri::XML(zip.read(ss_file)).remove_namespaces!
84
85
  end
85
86
 
86
87
  xml.sheets = []
@@ -218,11 +219,13 @@ module SimpleXlsxReader
218
219
  # 'ABA' = 26 * 26 * 1 + 26 * 2 + 1
219
220
  # 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
220
221
  def column_letter_to_number(column_letter)
221
- pow = -1
222
- column_letter.codepoints.reverse.inject(0) do |acc, charcode|
223
- pow += 1
224
- acc + 26**pow * (charcode - 64)
222
+ pow = column_letter.length - 1
223
+ result = 0
224
+ column_letter.each_byte do |b|
225
+ result += 26**pow * (b - 64)
226
+ pow -= 1
225
227
  end
228
+ result
226
229
  end
227
230
 
228
231
  # Excel doesn't record types for some cells, only its display style, so
@@ -241,21 +244,32 @@ module SimpleXlsxReader
241
244
  # type.
242
245
  def style_types
243
246
  @style_types ||=
244
- xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
245
- style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
247
+ xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
248
+ style_type_by_num_fmt_id(num_fmt_id(xstyle))}
249
+ end
250
+
251
+ #returns the numFmtId value if it's available
252
+ def num_fmt_id(xstyle)
253
+ if xstyle.attributes['numFmtId']
254
+ xstyle.attributes['numFmtId'].value
255
+ else
256
+ nil
257
+ end
246
258
  end
247
259
 
248
260
  # Finds the type we think a style is; For example, fmtId 14 is a date
249
- # style, so this would return :date
261
+ # style, so this would return :date.
262
+ #
263
+ # Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
264
+ # but in practice can sometimes be simply out of the usual "Any Language"
265
+ # id range that goes up to 49. For example, I have seen a numFmtId of
266
+ # 59 specified as a date. In Thai, 59 is a number format, so this seems
267
+ # like a bad idea, but we try to be flexible and just go with it.
250
268
  def style_type_by_num_fmt_id(id)
251
269
  return nil if id.nil?
252
270
 
253
271
  id = id.to_i
254
- if id >= 164 # custom style, arg!
255
- custom_style_types[id]
256
- else # we should know this one
257
- NumFmtMap[id]
258
- end
272
+ NumFmtMap[id] || custom_style_types[id]
259
273
  end
260
274
 
261
275
  # Map of (numFmtId >= 164) (custom styles) to our best guess at the type
@@ -347,15 +361,15 @@ module SimpleXlsxReader
347
361
  # the trickiest. note that all these formats can vary on
348
362
  # whether they actually contain a date, time, or datetime.
349
363
  when :date, :time, :date_time
350
- days_since_date_system_start, fraction_of_24 = value.split('.')
364
+ value = value.to_f
365
+ days_since_date_system_start = value.to_i
366
+ fraction_of_24 = value - days_since_date_system_start
351
367
 
352
368
  # http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
353
- date = options.fetch(:base_date, DATE_SYSTEM_1900) + Integer(days_since_date_system_start)
354
-
355
- if fraction_of_24 # there is a time associated
356
- fraction_of_24 = "0.#{fraction_of_24}".to_f
357
- seconds = (fraction_of_24 * 86400).round
369
+ date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
358
370
 
371
+ if fraction_of_24 > 0 # there is a time associated
372
+ seconds = (fraction_of_24 * 86400).round
359
373
  return Time.utc(date.year, date.month, date.day) + seconds
360
374
  else
361
375
  return date
@@ -1,3 +1,3 @@
1
1
  module SimpleXlsxReader
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.2"
3
3
  end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:date1904_file) { File.join(File.dirname(__FILE__), 'date1904.xlsx') }
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
 
3
3
  describe SimpleXlsxReader do
4
4
  let(:datetimes_file) { File.join(File.dirname(__FILE__),
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
11
11
  "Datetimes" =>
12
12
  [[Time.parse("2013-08-19 18:29:59 UTC")],
13
13
  [Time.parse("2013-08-19 18:30:00 UTC")],
14
- [Time.parse("2013-08-19 18:30:01 UTC")]]
14
+ [Time.parse("2013-08-19 18:30:01 UTC")],
15
+ [Time.parse("1899-12-30 00:30:00 UTC")]]
15
16
  })
16
17
  end
17
18
 
Binary file
@@ -0,0 +1,15 @@
1
+ require_relative 'test_helper'
2
+
3
+ describe SimpleXlsxReader do
4
+ let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
5
+ 'lower_case_sharedstrings.xlsx') }
6
+
7
+ let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
8
+
9
+
10
+ describe '#to_hash' do
11
+ it 'should have the word Well in the first row' do
12
+ subject.sheets.first.rows[0].must_include('Well')
13
+ end
14
+ end
15
+ end
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'minitest/benchmark'
3
3
 
4
4
  describe 'SimpleXlsxReader Benchmark' do
@@ -1,4 +1,4 @@
1
- require 'test_helper'
1
+ require_relative 'test_helper'
2
2
  require 'time'
3
3
 
4
4
  describe SimpleXlsxReader do
@@ -102,6 +102,13 @@ describe SimpleXlsxReader do
102
102
 
103
103
  it 'reads custom formatted styles (numFmtId >= 164)' do
104
104
  mapper.style_types[1].must_equal :date_time
105
+ mapper.custom_style_types[164].must_equal :date_time
106
+ end
107
+
108
+ # something I've seen in the wild; don't think it's correct, but let's be flexible.
109
+ it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
110
+ mapper.style_types[2].must_equal :date_time
111
+ mapper.custom_style_types[59].must_equal :date_time
105
112
  end
106
113
  end
107
114
 
@@ -258,6 +265,45 @@ describe SimpleXlsxReader do
258
265
  end
259
266
  end
260
267
 
268
+ describe "missing numFmtId attributes" do
269
+
270
+ let(:xml) do
271
+ SimpleXlsxReader::Document::Xml.new.tap do |xml|
272
+ xml.sheets = [Nokogiri::XML(
273
+ <<-XML
274
+ <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
275
+ <dimension ref="A1:A1" />
276
+ <sheetData>
277
+ <row>
278
+ <c r='A1' s='s'>
279
+ <v>some content</v>
280
+ </c>
281
+ </row>
282
+ </sheetData>
283
+ </worksheet>
284
+ XML
285
+ ).remove_namespaces!]
286
+
287
+ xml.styles = Nokogiri::XML(
288
+ <<-XML
289
+ <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
290
+
291
+ </styleSheet>
292
+ XML
293
+ ).remove_namespaces!
294
+ end
295
+ end
296
+
297
+ before do
298
+ @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
299
+ end
300
+
301
+ it 'continues even when cells are missing numFmtId attributes ' do
302
+ @row[0].must_equal 'some content'
303
+ end
304
+
305
+ end
306
+
261
307
  describe 'parsing types' do
262
308
  let(:xml) do
263
309
  SimpleXlsxReader::Document::Xml.new.tap do |xml|
@@ -1,6 +1,7 @@
1
1
  <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
2
2
  <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
3
- <numFmts count="1">
3
+ <numFmts count="2">
4
+ <numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
4
5
  <numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
5
6
  </numFmts>
6
7
  <fonts count="3" x14ac:knownFonts="1">
@@ -50,9 +51,10 @@
50
51
  <xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
51
52
  <xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
52
53
  </cellStyleXfs>
53
- <cellXfs count="3">
54
+ <cellXfs count="4">
54
55
  <xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
55
56
  <xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
+ <xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
56
58
  <xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
57
59
  </cellXfs>
58
60
  <cellStyles count="3">
@@ -2,6 +2,7 @@ gem 'minitest'
2
2
  require 'minitest/autorun'
3
3
  require 'minitest/spec'
4
4
  require 'pry'
5
+ require 'time'
5
6
 
6
7
  $:.unshift File.expand_path("lib")
7
8
  require 'simple_xlsx_reader'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-02 00:00:00.000000000 Z
11
+ date: 2015-02-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -86,6 +86,8 @@ files:
86
86
  - test/date1904_test.rb
87
87
  - test/datetime_test.rb
88
88
  - test/datetimes.xlsx
89
+ - test/lower_case_sharedstrings.xlsx
90
+ - test/lower_case_sharedstrings_test.rb
89
91
  - test/performance_test.rb
90
92
  - test/sesame_street_blog.xlsx
91
93
  - test/shared_strings.xml
@@ -111,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
111
113
  version: '0'
112
114
  requirements: []
113
115
  rubyforge_project:
114
- rubygems_version: 2.2.0
116
+ rubygems_version: 2.2.2
115
117
  signing_key:
116
118
  specification_version: 4
117
119
  summary: Read xlsx data the Ruby way
@@ -120,6 +122,8 @@ test_files:
120
122
  - test/date1904_test.rb
121
123
  - test/datetime_test.rb
122
124
  - test/datetimes.xlsx
125
+ - test/lower_case_sharedstrings.xlsx
126
+ - test/lower_case_sharedstrings_test.rb
123
127
  - test/performance_test.rb
124
128
  - test/sesame_street_blog.xlsx
125
129
  - test/shared_strings.xml