simple_xlsx_reader 2.0.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 979490ce3bd7f0482879fb5fb5465e10ad1b07c1488d0a544950131d9063050a
4
- data.tar.gz: 412d0040a586cc5ee4acdd4a2f74dd74f3bf9eb781a35d8a36c12f6caadc566c
3
+ metadata.gz: 5449525d4e46a013f92e8406a2ec2d07b06bb795efc7c8d76b9ffbcace22a38f
4
+ data.tar.gz: 5c664baa8d88692767f5bb6d2879e24c27098206695b88da42f3bc0d30bb9bce
5
5
  SHA512:
6
- metadata.gz: 00c01bc0c2a393eb35e458411dfeab55b8bf30cee2661324cbd97a175baf0ceb31a881b1b2b7bd668a2b475ff008372c1428908340e30769308884355fdd46e8
7
- data.tar.gz: 81b1b26806a97c56710cab64aa22212985dea82b308e2fbba6835f4ea7a69b79067268bb13537999594dc5722928f1df235938355a7d4a51b58ae7ed4af1d093
6
+ metadata.gz: 90d5fedde0aa4cc2bcb8b4d9134859890bbf4357efbf6ba9aa0aaf3bd21ad1cd9f54a279d1126938a300dee36fbb8a10d63b66f945c9f7eeb8edc880bb23327a
7
+ data.tar.gz: ab684cc09075a0b9a1054c045bf4202718159b258c87f62b10e18aafa4faaa7a527ba95f8e765f7609ab7ba24c43a31176906ecefd612c85badbdefee9164184
@@ -22,15 +22,12 @@ jobs:
22
22
  runs-on: ubuntu-latest
23
23
  strategy:
24
24
  matrix:
25
- ruby-version: ['2.6', '2.7', '3.0']
25
+ ruby-version: ['2.6', '2.7', '3.0', '3.1', '3.2']
26
26
 
27
27
  steps:
28
28
  - uses: actions/checkout@v3
29
29
  - name: Set up Ruby
30
- # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
31
- # change this to (see https://github.com/ruby/setup-ruby#versioning):
32
- # uses: ruby/setup-ruby@v1
33
- uses: ruby/setup-ruby@2b019609e2b0f1ea1a2bc8ca11cb82ab46ada124
30
+ uses: ruby/setup-ruby@v1
34
31
  with:
35
32
  ruby-version: ${{ matrix.ruby-version }}
36
33
  bundler-cache: true # runs 'bundle install' and caches installed gems automatically
data/CHANGELOG.md CHANGED
@@ -1,3 +1,21 @@
1
+ ### 3.0.0
2
+
3
+ * Change the way we typecast cells in the General format. This probably won't
4
+ break anything in your app, but it's a change in behavior that theoretically
5
+ could.
6
+
7
+ Previously, we were treating cells using General the format as strings, when
8
+ according to the Office XML standard, they should be treated as numbers. We
9
+ now attempt to cast such cells as numbers, and fall back to strings if number
10
+ casting fails.
11
+
12
+ Thanks @jrodrigosm
13
+
14
+ ### 2.0.1
15
+
16
+ * Restore ability to parse IO strings (@robbevp)
17
+ * Add Ruby 3.1 and 3.2 to CI (@taichi-ishitani)
18
+
1
19
  ### 2.0.0
2
20
 
3
21
  * SPEED
@@ -8,14 +8,16 @@ module SimpleXlsxReader
8
8
  # Main class for the public API. See the README for usage examples,
9
9
  # or read the code, it's pretty friendly.
10
10
  class Document
11
- attr_reader :file_path
11
+ attr_reader :string_or_io
12
12
 
13
- def initialize(file_path)
14
- @file_path = file_path
13
+ def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
14
+ fail(ArgumentError, 'either file_path or string_or_io must be provided') if legacy_file_path.nil? && file_path.nil? && string_or_io.nil?
15
+
16
+ @string_or_io = string_or_io || File.new(legacy_file_path || file_path)
15
17
  end
16
18
 
17
19
  def sheets
18
- @sheets ||= Loader.new(file_path).init_sheets
20
+ @sheets ||= Loader.new(string_or_io).init_sheets
19
21
  end
20
22
 
21
23
  # Expensive because it slurps all the sheets into memory,
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- class Loader < Struct.new(:file_path)
4
+ class Loader < Struct.new(:string_or_io)
5
5
  attr_accessor :shared_strings, :sheet_parsers, :sheet_toc, :style_types, :base_date
6
6
 
7
7
  def init_sheets
8
8
  ZipReader.new(
9
- file_path: file_path,
9
+ string_or_io: string_or_io,
10
10
  loader: self
11
11
  ).read
12
12
 
@@ -19,12 +19,12 @@ module SimpleXlsxReader
19
19
  end
20
20
  end
21
21
 
22
- ZipReader = Struct.new(:file_path, :loader, keyword_init: true) do
22
+ ZipReader = Struct.new(:string_or_io, :loader, keyword_init: true) do
23
23
  attr_reader :zip
24
24
 
25
25
  def initialize(*args)
26
26
  super
27
- @zip = SimpleXlsxReader::Zip.open(file_path)
27
+ @zip = SimpleXlsxReader::Zip.open_buffer(string_or_io)
28
28
  end
29
29
 
30
30
  def read
@@ -149,7 +149,13 @@ module SimpleXlsxReader
149
149
  # detected earlier and cast here by its standardized symbol
150
150
  ##
151
151
 
152
- when :string, :unsupported
152
+ # no type encoded with the the General format defaults to a number type
153
+ when nil, :string
154
+ retval = Integer(value, exception: false)
155
+ retval ||= Float(value, exception: false)
156
+ retval ||= value
157
+ retval
158
+ when :unsupported
153
159
  value
154
160
  when :fixnum
155
161
  value.to_i
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleXlsxReader
4
- VERSION = '2.0.0'
4
+ VERSION = '3.0.0'
5
5
  end
@@ -42,8 +42,11 @@ module SimpleXlsxReader
42
42
  end
43
43
 
44
44
  def open(file_path)
45
- Document.new(file_path).tap(&:sheets)
45
+ Document.new(file_path: file_path).tap(&:sheets)
46
+ end
47
+
48
+ def parse(string_or_io)
49
+ Document.new(string_or_io: string_or_io).tap(&:sheets)
46
50
  end
47
- alias parse open
48
51
  end
49
52
  end
Binary file
@@ -18,6 +18,7 @@ describe SimpleXlsxReader do
18
18
 
19
19
  let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
20
20
  let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
21
+ let(:sesame_street_blog_string) { IO.read(sesame_street_blog_file_path) }
21
22
 
22
23
  let(:expected_result) do
23
24
  {
@@ -54,6 +55,14 @@ describe SimpleXlsxReader do
54
55
  end
55
56
  end
56
57
 
58
+ describe 'load from string' do
59
+ let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
60
+
61
+ it 'reads an xlsx string into a hash of {[sheet name] => [data]}' do
62
+ _(subject.to_hash).must_equal(expected_result)
63
+ end
64
+ end
65
+
57
66
  it 'outputs strings in UTF-8 encoding' do
58
67
  document = SimpleXlsxReader.parse(sesame_street_blog_io)
59
68
  _(document.sheets[0].rows.to_a.flatten.map(&:encoding).uniq)
@@ -818,6 +827,10 @@ describe SimpleXlsxReader do
818
827
  <c r='I1' s='0'>
819
828
  <v>GUI-made hyperlink</v>
820
829
  </c>
830
+
831
+ <c r='J1' s='0'>
832
+ <v>1</v>
833
+ </c>
821
834
  </row>
822
835
  </sheetData>
823
836
 
@@ -916,6 +929,10 @@ describe SimpleXlsxReader do
916
929
  )
917
930
  )
918
931
  end
932
+
933
+ it "reads 'Generic' cells with numbers as numbers" do
934
+ _(@row[9]).must_equal 1
935
+ end
919
936
  end
920
937
 
921
938
  describe 'parsing documents with blank rows' do
@@ -927,7 +944,7 @@ describe SimpleXlsxReader do
927
944
  <sheetData>
928
945
  <row r="2" spans="1:1">
929
946
  <c r="A2" s="0">
930
- <v>0</v>
947
+ <v>a</v>
931
948
  </c>
932
949
  </row>
933
950
  <row r="4" spans="1:1">
@@ -958,13 +975,30 @@ describe SimpleXlsxReader do
958
975
  it 'reads row data despite gaps in row numbering' do
959
976
  _(@rows).must_equal [
960
977
  [nil, nil, nil, nil],
961
- ['0', nil, nil, nil],
978
+ ['a', nil, nil, nil],
962
979
  [nil, nil, nil, nil],
963
- [nil, '1', nil, nil],
964
- [nil, nil, '2', nil],
980
+ [nil, 1, nil, nil],
981
+ [nil, nil, 2, nil],
965
982
  [nil, nil, nil, nil],
966
- [nil, nil, nil, '3']
983
+ [nil, nil, nil, 3]
967
984
  ]
968
985
  end
969
986
  end
987
+
988
+ # https://support.microsoft.com/en-us/office/available-number-formats-in-excel-0afe8f52-97db-41f1-b972-4b46e9f1e8d2
989
+ describe 'numeric fields styled as "General"' do
990
+ let(:misc_numbers_path) do
991
+ File.join(File.dirname(__FILE__), 'misc_numbers.xlsx')
992
+ end
993
+
994
+ let(:sheet) { SimpleXlsxReader.open(misc_numbers_path).sheets[0] }
995
+
996
+ it 'reads medium sized integers as integers' do
997
+ _(sheet.rows.slurp[1][0]).must_equal 98070
998
+ end
999
+
1000
+ it 'reads large (>12 char) integers as integers' do
1001
+ _(sheet.rows.slurp[1][1]).must_equal 1234567890123
1002
+ end
1003
+ end
970
1004
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_xlsx_reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 3.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Woody Peterson
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-08-18 00:00:00.000000000 Z
11
+ date: 2023-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -113,6 +113,7 @@ files:
113
113
  - test/gdocs_sheet_test.rb
114
114
  - test/lower_case_sharedstrings.xlsx
115
115
  - test/lower_case_sharedstrings_test.rb
116
+ - test/misc_numbers.xlsx
116
117
  - test/performance_test.rb
117
118
  - test/sesame_street_blog.xlsx
118
119
  - test/shared_strings.xml
@@ -152,6 +153,7 @@ test_files:
152
153
  - test/gdocs_sheet_test.rb
153
154
  - test/lower_case_sharedstrings.xlsx
154
155
  - test/lower_case_sharedstrings_test.rb
156
+ - test/misc_numbers.xlsx
155
157
  - test/performance_test.rb
156
158
  - test/sesame_street_blog.xlsx
157
159
  - test/shared_strings.xml