xsv 0.3.12 → 0.3.17

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 36eeca38d2791f71dafba9b1ae6308e63b3554efd98e2949448c4715863b7616
4
- data.tar.gz: cf90d5ff1def79ee8b17877523a5e8233b7c5253801c98eb0e725724a6e3f48d
3
+ metadata.gz: e04167014a265ca3a3551f185b1a37849cae430bda9fa5cd60f7a302643c3722
4
+ data.tar.gz: 9aced295617b61bb29b6d8c5d087722f73176dc555490fb231a0c5ba05472349
5
5
  SHA512:
6
- metadata.gz: cc203883d561d86a51eb35c11f4c4547929605387a43d38c16eb48f975a2da1ea332c7cbb4aab21e4760424ceb015de2e4d50aeeddd9bf595ccd3bf1f363ac63
7
- data.tar.gz: b91d7d210c6894c17bfbb2b40dd0c243f2c5228f35c780f8e5d8afba9367303d2df1897ae734bec636f8eab615cda7188d63e63b270b3f543fafc39d91d1aa7e
6
+ metadata.gz: fb97e738cffac1e980a1cfedbd2ed3def2f66d72f78384623be80ccf89cb838bb571a369f10277c066eeb1bf1fb10c19feb542cedc1cb4ffd6463550a459e81c
7
+ data.tar.gz: 20c758923b7ed3efd3b847ca74bbe6f5683a7f97f2facca0fedd1ca75011b8fab00caab3b44cd909357d9fed5b50808fecadb06b439b463988a5cdd59f88d374
@@ -4,8 +4,7 @@ language: ruby
4
4
  cache: bundler
5
5
  rvm:
6
6
  - 2.5.8
7
- - 2.6.5
8
- - 2.7.0
7
+ - 2.7.1
9
8
  env:
10
9
  - "rubyzip=1.3.0"
11
10
  - "rubyzip=2.2.0"
@@ -1,5 +1,29 @@
1
1
  # Xsv Changelog
2
2
 
3
+ ## 0.3.17 2020-07-03
4
+
5
+ - Fix parsing of empty worksheets (#17)
6
+
7
+ ## 0.3.16 2020-06-03
8
+
9
+ - Support complex numbers (#16)
10
+
11
+ ## 0.3.15 2020-06-02
12
+
13
+ - Fix issue with workbooks that don't contain shared strings (#15)
14
+
15
+ ## 0.3.14 2020-05-22
16
+
17
+ - Allow opening workbooks from Tempfile and anything that responds to #read
18
+
19
+ - Preserve whitespace in text cells
20
+
21
+ ## 0.3.13 2020-05-12
22
+
23
+ - Add Sheet#hidden?
24
+
25
+ - Clean up code; get rid of some deprecation warnings
26
+
3
27
  ## 0.3.12 - 2020-04-15
4
28
 
5
29
  - Accessing worksheets by name (texpert)
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in xsv.gemspec
6
6
  gemspec
data/README.md CHANGED
@@ -123,6 +123,14 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
123
123
 
124
124
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
125
125
 
126
+ ## Performance and Benchmarks
127
+
128
+ Xsv is faster and more memory efficient than other gems because of two things: it only _reads values_ from Excel files and it's based on a SAX-based parser instead of a DOM-based parser. If you want to read some background on this, check out my blog post on
129
+ [Efficient XML parsing in Ruby](https://storck.io/posts/efficient-xml-parsing-in-ruby/).
130
+
131
+ Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage and allocations.
132
+ Check our his blog post: [Faster Excel parsing in Ruby](https://blog.schembri.me/post/faster-excel-parsing-in-ruby/).
133
+
126
134
  ## Contributing
127
135
 
128
136
  Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
data/Rakefile CHANGED
@@ -14,4 +14,3 @@ Rake::TestTask.new(:bench) do |t|
14
14
  end
15
15
 
16
16
  task :default => [:test, :bench]
17
-
data/lib/xsv.rb CHANGED
@@ -8,7 +8,7 @@ require "xsv/shared_strings_parser"
8
8
  require "xsv/sheet"
9
9
  require "xsv/sheet_bounds_handler"
10
10
  require "xsv/sheet_rows_handler"
11
- require 'xsv/sheets_ids_handler'
11
+ require "xsv/sheets_ids_handler"
12
12
  require "xsv/styles_handler"
13
13
  require "xsv/version"
14
14
  require "xsv/workbook"
@@ -19,6 +19,7 @@ require "xsv/workbook"
19
19
  # deals with minimal formatting and cannot create or modify documents.
20
20
  module Xsv
21
21
  class Error < StandardError; end
22
+
22
23
  # An AssertionFailed error indicates an unexpected condition, meaning a bug
23
24
  # or misinterpreted .xlsx document
24
25
  class AssertionFailed < StandardError; end
@@ -39,7 +39,7 @@ module Xsv
39
39
 
40
40
  MINUTE = 60.freeze
41
41
  HOUR = 3600.freeze
42
- A_CODEPOINT = 'A'.ord.freeze
42
+ A_CODEPOINT = "A".ord.freeze
43
43
  # The epoch for all dates in OOXML Spreadsheet documents
44
44
  EPOCH = Date.new(1899, 12, 30).freeze
45
45
 
@@ -53,7 +53,7 @@ module Xsv
53
53
 
54
54
  # Return a Date for the given Excel date value
55
55
  def parse_date(number)
56
- EPOCH + number
56
+ EPOCH + number
57
57
  end
58
58
 
59
59
  # Return a time as a string for the given Excel time value
@@ -94,6 +94,8 @@ module Xsv
94
94
  def parse_number(string)
95
95
  if string.include? "."
96
96
  string.to_f
97
+ elsif string.include? "E"
98
+ Complex(string).to_f
97
99
  else
98
100
  string.to_i
99
101
  end
@@ -17,6 +17,7 @@ module Xsv
17
17
 
18
18
  def initialize(&block)
19
19
  @block = block
20
+ @relationship = {}
20
21
  end
21
22
 
22
23
  def start_element(name)
@@ -25,8 +26,8 @@ module Xsv
25
26
 
26
27
  def attr(name, value)
27
28
  case name
28
- when :Id, :Type, :Target
29
- @relationship[name] = value
29
+ when :Id, :Type, :Target
30
+ @relationship[name] = value
30
31
  end
31
32
  end
32
33
 
@@ -6,7 +6,7 @@ module Xsv
6
6
  def self.parse(io)
7
7
  strings = []
8
8
  handler = new { |s| strings << s }
9
- Ox.sax_parse(handler, io.read)
9
+ Ox.sax_parse(handler, io.read, skip: :skip_none)
10
10
  return strings
11
11
  end
12
12
 
@@ -39,6 +39,7 @@ module Xsv
39
39
  @headers = []
40
40
  @mode = :array
41
41
  @row_skip = 0
42
+ @hidden = ids[:state] == "hidden"
42
43
 
43
44
  @last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
44
45
  end
@@ -48,6 +49,11 @@ module Xsv
48
49
  "#<#{self.class.name}:#{self.object_id}>"
49
50
  end
50
51
 
52
+ # Returns true if the worksheet is hidden
53
+ def hidden?
54
+ @hidden
55
+ end
56
+
51
57
  # Iterate over rows, returning either hashes or arrays based on the current mode.
52
58
  def each_row(&block)
53
59
  @io.rewind
@@ -106,7 +112,7 @@ module Xsv
106
112
  elsif @mode == :hash
107
113
  @mode = :array
108
114
  headers.tap { @mode = :hash }
109
- end
115
+ end || []
110
116
  end
111
117
 
112
118
  def empty_row
@@ -17,10 +17,14 @@ module Xsv
17
17
 
18
18
  def initialize(&block)
19
19
  @block = block
20
+ @parsing = false
20
21
  end
21
22
 
22
23
  def start_element(name)
23
- return @parsing = true if name == :sheets
24
+ if name == :sheets
25
+ @parsing = true
26
+ return
27
+ end
24
28
 
25
29
  return unless name == :sheet
26
30
 
@@ -31,15 +35,18 @@ module Xsv
31
35
  return unless @parsing
32
36
 
33
37
  case name
34
- when :name, :sheetId
35
- @sheet_ids[name] = value
36
- when :'r:id'
37
- @sheet_ids[:r_id] = value
38
+ when :name, :sheetId, :state
39
+ @sheet_ids[name] = value
40
+ when :'r:id'
41
+ @sheet_ids[:r_id] = value
38
42
  end
39
43
  end
40
44
 
41
45
  def end_element(name)
42
- return @parsing = false if name == :sheets
46
+ if name == :sheets
47
+ @parsing = false
48
+ return
49
+ end
43
50
 
44
51
  return unless name == :sheet
45
52
 
@@ -7,8 +7,8 @@ module Xsv
7
7
  @xfs = nil
8
8
  @numFmts = nil
9
9
  handler = new(numFmts) do |xfs, numFmts|
10
- @xfs = xfs
11
- @numFmts = numFmts
10
+ @xfs = xfs
11
+ @numFmts = numFmts
12
12
  end
13
13
 
14
14
  Ox.sax_parse(handler, io.read)
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
- VERSION = "0.3.12"
3
+ VERSION = "0.3.17"
4
4
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- require 'zip'
2
+ require "zip"
3
3
 
4
4
  module Xsv
5
5
  # An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
@@ -15,11 +15,11 @@ module Xsv
15
15
  # Open the workbook of the given filename, string or buffer. For additional
16
16
  # options see {.initialize}
17
17
  def self.open(data, **kws)
18
- if data.is_a?(IO)
18
+ if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
19
19
  @workbook = self.new(Zip::File.open_buffer(data), **kws)
20
- elsif data.start_with?("PK\x03\x04")
20
+ elsif data.start_with?("PK\x03\x04") # is it a string containing a filename?
21
21
  @workbook = self.new(Zip::File.open_buffer(data), **kws)
22
- else
22
+ else # must be a filename
23
23
  @workbook = self.new(Zip::File.open(data), **kws)
24
24
  end
25
25
  end
@@ -75,7 +75,10 @@ module Xsv
75
75
  private
76
76
 
77
77
  def fetch_shared_strings
78
- stream = @zip.glob("xl/sharedStrings.xml").first.get_input_stream
78
+ handle = @zip.glob("xl/sharedStrings.xml").first
79
+ return if handle.nil?
80
+
81
+ stream = handle.get_input_stream
79
82
  @shared_strings = SharedStringsParser.parse(stream)
80
83
 
81
84
  stream.close
@@ -91,7 +94,7 @@ module Xsv
91
94
  @zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
92
95
  a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
93
96
  end.each do |entry|
94
- rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?('worksheet') }
97
+ rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
95
98
  sheet_ids = @sheets_ids.detect { |i| i[:r_id] == rel[:Id] }
96
99
  @sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
97
100
  end
@@ -1,23 +1,22 @@
1
-
2
1
  lib = File.expand_path("../lib", __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "xsv/version"
5
4
 
6
5
  Gem::Specification.new do |spec|
7
- spec.name = "xsv"
8
- spec.version = Xsv::VERSION
9
- spec.authors = ["Martijn Storck"]
10
- spec.email = ["martijn@storck.io"]
6
+ spec.name = "xsv"
7
+ spec.version = Xsv::VERSION
8
+ spec.authors = ["Martijn Storck"]
9
+ spec.email = ["martijn@storck.io"]
11
10
 
12
- spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
13
- spec.description = <<-EOF
11
+ spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
12
+ spec.description = <<-EOF
14
13
  Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
15
14
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
16
15
  sense that it provides nothing a CSV reader wouldn't, meaning it only
17
16
  deals with minimal formatting and cannot create or modify documents.
18
17
  EOF
19
- spec.homepage = "https://github.com/martijn/xsv"
20
- spec.license = "MIT"
18
+ spec.homepage = "https://github.com/martijn/xsv"
19
+ spec.license = "MIT"
21
20
 
22
21
  if spec.respond_to?(:metadata)
23
22
  spec.metadata["homepage_uri"] = spec.homepage
@@ -25,19 +24,19 @@ Gem::Specification.new do |spec|
25
24
  spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
26
25
  else
27
26
  raise "RubyGems 2.0 or newer is required to protect against " \
28
- "public gem pushes."
27
+ "public gem pushes."
29
28
  end
30
29
 
31
30
  # Specify which files should be added to the gem when it is released.
32
31
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
33
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
32
+ spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
34
33
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
35
34
  end
36
- spec.bindir = "exe"
37
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
35
+ spec.bindir = "exe"
36
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
38
37
  spec.require_paths = ["lib"]
39
38
 
40
- spec.required_ruby_version = '~> 2.5'
39
+ spec.required_ruby_version = "~> 2.5"
41
40
 
42
41
  spec.add_dependency "rubyzip", ">= 1.3", "< 3"
43
42
  spec.add_dependency "ox", ">= 2.9"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.12
4
+ version: 0.3.17
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-15 00:00:00.000000000 Z
11
+ date: 2020-07-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -117,7 +117,6 @@ files:
117
117
  - lib/xsv/styles_handler.rb
118
118
  - lib/xsv/version.rb
119
119
  - lib/xsv/workbook.rb
120
- - test.sh
121
120
  - xsv.gemspec
122
121
  homepage: https://github.com/martijn/xsv
123
122
  licenses:
data/test.sh DELETED
@@ -1,3 +0,0 @@
1
- #!/bin/sh
2
-
3
- ruby -Ilib:test test/*_test.rb