xsv 0.3.11 → 0.3.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 601dc989cf2bdf62aa50002b0293a387df53bf20022fd16fb2f8885c344e9013
4
- data.tar.gz: 6afb26f05ad129280b823540cb3d0e957ab88ea01b877d1758d8c2b376d2d69a
3
+ metadata.gz: 3ab67d9fe191b18a010c3526e270f7a528189b8a8ad4496ff5cc8100497c8dfd
4
+ data.tar.gz: 3f7aec9bad6532dda901764d7c80342ab0763370bf0475d9159cc42730a8405e
5
5
  SHA512:
6
- metadata.gz: 1e946848856a07d1b95fa22f9f97b22fa4bc9fcb7d6b5985b9ff7a9e2ec4e8dc42f74964ec6c2872f22c80773c7f5ca2188e857af32f1f51345d7feebf77047c
7
- data.tar.gz: f1ff12b091a4fcd8172064ea6f215011bc43e95bd5dcec586d189b72eb0ef0662eaf48b85ac7794ba82a27d824d61459025ed676a4e7b729be7e71f957cc48bd
6
+ metadata.gz: 740bb30608da907db4ea9697587dd6da4c2cfc9032fa7196eb1dccfe1912ea77387a0545bb81980f0031f71b746cb6a7c1628aef4f1f35c2697fd68dac80379f
7
+ data.tar.gz: 288c520bf0b01a3ebb6463dba02bb63ed43eb9fa033ee5e19f1891411cc0c6554415f18455d07d94b45d4c8a56469a00e20d85794357eb48cf5106821458aee5
@@ -4,8 +4,7 @@ language: ruby
4
4
  cache: bundler
5
5
  rvm:
6
6
  - 2.5.8
7
- - 2.6.5
8
- - 2.7.0
7
+ - 2.7.1
9
8
  env:
10
9
  - "rubyzip=1.3.0"
11
10
  - "rubyzip=2.2.0"
@@ -1,5 +1,29 @@
1
1
  # Xsv Changelog
2
2
 
3
+ ## 0.3.16 2020-06-03
4
+
5
+ - Support complex numbers (#16)
6
+
7
+ ## 0.3.15 2020-06-02
8
+
9
+ - Fix issue with workbooks that don't contain shared strings (#15)
10
+
11
+ ## 0.3.14 2020-05-22
12
+
13
+ - Allow opening workbooks from Tempfile and anything that responds to #read
14
+
15
+ - Preserve whitespace in text cells
16
+
17
+ ## 0.3.13 2020-05-12
18
+
19
+ - Add Sheet#hidden?
20
+
21
+ - Clean up code; get rid of some deprecation warnings
22
+
23
+ ## 0.3.12 - 2020-04-15
24
+
25
+ - Accessing worksheets by name (texpert)
26
+
3
27
  ## 0.3.11 - 2020-04-03
4
28
 
5
29
  - Backward compatibility with Ruby 2.5 (texpert)
data/Gemfile CHANGED
@@ -1,6 +1,6 @@
1
1
  source "https://rubygems.org"
2
2
 
3
- git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
3
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
4
4
 
5
5
  # Specify your gem's dependencies in xsv.gemspec
6
6
  gemspec
data/README.md CHANGED
@@ -83,6 +83,22 @@ columns with the same name!
83
83
  `Xsv::Sheet` implements `Enumerable` so you can call methods like `#first`,
84
84
  `#filter`/`#select` and `#map` on it.
85
85
 
86
+ The sheets could be accessed by index or by name:
87
+
88
+ ```ruby
89
+ x = Xsv::Workbook.open("sheet.xlsx")
90
+
91
+ sheet = x.sheets[0] # gets sheet by index
92
+
93
+ sheet = x.sheets_by_name('Name').first # gets sheet by name
94
+ ```
95
+
96
+ To get all the workbook's sheets names:
97
+
98
+ ```ruby
99
+ sheet_names = x.sheets.map(&:name)
100
+ ```
101
+
86
102
  ### Assumptions
87
103
 
88
104
  Since Xsv treats worksheets like csv files it makes certain assumptions about your
@@ -107,6 +123,14 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run
107
123
 
108
124
  To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
109
125
 
126
+ ## Performance and Benchmarks
127
+
128
+ Xsv is faster and more memory efficient than other gems because of two things: it only _reads values_ from Excel files and it's based on a SAX-based parser instead of a DOM-based parser. If you want to read some background on this, check out my blog post on
129
+ [Efficient XML parsing in Ruby](https://storck.io/posts/efficient-xml-parsing-in-ruby/).
130
+
131
+ Jamie Schembri did a shootout of Xsv against various other Excel reading gems comparing parsing speed, memory usage and allocations.
132
+ Check our his blog post: [Faster Excel parsing in Ruby](https://blog.schembri.me/post/faster-excel-parsing-in-ruby/).
133
+
110
134
  ## Contributing
111
135
 
112
136
  Bug reports and pull requests are welcome on GitHub at https://github.com/martijn/xsv.
data/Rakefile CHANGED
@@ -14,4 +14,3 @@ Rake::TestTask.new(:bench) do |t|
14
14
  end
15
15
 
16
16
  task :default => [:test, :bench]
17
-
data/lib/xsv.rb CHANGED
@@ -3,10 +3,12 @@ require "date"
3
3
  require "ox"
4
4
 
5
5
  require "xsv/helpers"
6
+ require "xsv/relationships_handler"
6
7
  require "xsv/shared_strings_parser"
7
8
  require "xsv/sheet"
8
9
  require "xsv/sheet_bounds_handler"
9
10
  require "xsv/sheet_rows_handler"
11
+ require "xsv/sheets_ids_handler"
10
12
  require "xsv/styles_handler"
11
13
  require "xsv/version"
12
14
  require "xsv/workbook"
@@ -17,6 +19,7 @@ require "xsv/workbook"
17
19
  # deals with minimal formatting and cannot create or modify documents.
18
20
  module Xsv
19
21
  class Error < StandardError; end
22
+
20
23
  # An AssertionFailed error indicates an unexpected condition, meaning a bug
21
24
  # or misinterpreted .xlsx document
22
25
  class AssertionFailed < StandardError; end
@@ -39,7 +39,7 @@ module Xsv
39
39
 
40
40
  MINUTE = 60.freeze
41
41
  HOUR = 3600.freeze
42
- A_CODEPOINT = 'A'.ord.freeze
42
+ A_CODEPOINT = "A".ord.freeze
43
43
  # The epoch for all dates in OOXML Spreadsheet documents
44
44
  EPOCH = Date.new(1899, 12, 30).freeze
45
45
 
@@ -53,7 +53,7 @@ module Xsv
53
53
 
54
54
  # Return a Date for the given Excel date value
55
55
  def parse_date(number)
56
- EPOCH + number
56
+ EPOCH + number
57
57
  end
58
58
 
59
59
  # Return a time as a string for the given Excel time value
@@ -94,6 +94,8 @@ module Xsv
94
94
  def parse_number(string)
95
95
  if string.include? "."
96
96
  string.to_f
97
+ elsif string.include? "E"
98
+ Complex(string).to_f
97
99
  else
98
100
  string.to_i
99
101
  end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+ module Xsv
3
+ # RelationshipsHandler parses the "xl/_rels/workbook.xml.rels" file to get the existing relationships.
4
+ # This is used internally when opening a workbook.
5
+ class RelationshipsHandler < Ox::Sax
6
+ def self.get_relations(io)
7
+ relations = []
8
+ handler = new do |relation|
9
+ relations << relation
10
+ end
11
+
12
+ Ox.sax_parse(handler, io.read)
13
+ return relations
14
+ end
15
+
16
+ # Ox::Sax implementation
17
+
18
+ def initialize(&block)
19
+ @block = block
20
+ @relationship = {}
21
+ end
22
+
23
+ def start_element(name)
24
+ @relationship = {} if name == :Relationship
25
+ end
26
+
27
+ def attr(name, value)
28
+ case name
29
+ when :Id, :Type, :Target
30
+ @relationship[name] = value
31
+ end
32
+ end
33
+
34
+ def end_element(name)
35
+ return unless name == :Relationship
36
+
37
+ @block.call(@relationship)
38
+ end
39
+ end
40
+ end
@@ -6,7 +6,7 @@ module Xsv
6
6
  def self.parse(io)
7
7
  strings = []
8
8
  handler = new { |s| strings << s }
9
- Ox.sax_parse(handler, io.read)
9
+ Ox.sax_parse(handler, io.read, skip: :skip_none)
10
10
  return strings
11
11
  end
12
12
 
@@ -17,7 +17,7 @@ module Xsv
17
17
 
18
18
  # Returns the current mode. Call {#parse_headers!} to switch to `:hash` mode
19
19
  # @return [Symbol] `:hash` or `:array`
20
- attr_reader :mode
20
+ attr_reader :id, :mode, :name
21
21
 
22
22
  # Set a number of rows to skip at the top of the sheet (header row offset).
23
23
  # For hash mode, do not skip the header row as this will be automatically
@@ -30,13 +30,16 @@ module Xsv
30
30
  # @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
31
31
  # @param io [IO] A handle to an open worksheet XML file
32
32
  # @param size [Number] size of the XML file
33
- def initialize(workbook, io, size)
33
+ def initialize(workbook, io, size, ids)
34
34
  @workbook = workbook
35
+ @id = ids[:sheetId].to_i
35
36
  @io = io
37
+ @name = ids[:name]
36
38
  @size = size
37
39
  @headers = []
38
40
  @mode = :array
39
41
  @row_skip = 0
42
+ @hidden = ids[:state] == "hidden"
40
43
 
41
44
  @last_row, @column_count = SheetBoundsHandler.get_bounds(@io, @workbook)
42
45
  end
@@ -46,6 +49,11 @@ module Xsv
46
49
  "#<#{self.class.name}:#{self.object_id}>"
47
50
  end
48
51
 
52
+ # Returns true if the worksheet is hidden
53
+ def hidden?
54
+ @hidden
55
+ end
56
+
49
57
  # Iterate over rows, returning either hashes or arrays based on the current mode.
50
58
  def each_row(&block)
51
59
  @io.rewind
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+ module Xsv
3
+ # SheetsIdsHandler interprets the relevant parts of workbook.xml
4
+ # This is used internally to get the sheets ids, relationship_ids, and names when opening a workbook.
5
+ class SheetsIdsHandler < Ox::Sax
6
+ def self.get_sheets_ids(io)
7
+ sheets_ids = []
8
+ handler = new do |sheet_ids|
9
+ sheets_ids << sheet_ids
10
+ end
11
+
12
+ Ox.sax_parse(handler, io.read)
13
+ return sheets_ids
14
+ end
15
+
16
+ # Ox::Sax implementation
17
+
18
+ def initialize(&block)
19
+ @block = block
20
+ @parsing = false
21
+ end
22
+
23
+ def start_element(name)
24
+ if name == :sheets
25
+ @parsing = true
26
+ return
27
+ end
28
+
29
+ return unless name == :sheet
30
+
31
+ @sheet_ids = {}
32
+ end
33
+
34
+ def attr(name, value)
35
+ return unless @parsing
36
+
37
+ case name
38
+ when :name, :sheetId, :state
39
+ @sheet_ids[name] = value
40
+ when :'r:id'
41
+ @sheet_ids[:r_id] = value
42
+ end
43
+ end
44
+
45
+ def end_element(name)
46
+ if name == :sheets
47
+ @parsing = false
48
+ return
49
+ end
50
+
51
+ return unless name == :sheet
52
+
53
+ @block.call(@sheet_ids)
54
+ end
55
+ end
56
+ end
@@ -7,8 +7,8 @@ module Xsv
7
7
  @xfs = nil
8
8
  @numFmts = nil
9
9
  handler = new(numFmts) do |xfs, numFmts|
10
- @xfs = xfs
11
- @numFmts = numFmts
10
+ @xfs = xfs
11
+ @numFmts = numFmts
12
12
  end
13
13
 
14
14
  Ox.sax_parse(handler, io.read)
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
- VERSION = "0.3.11"
3
+ VERSION = "0.3.16"
4
4
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- require 'zip'
2
+ require "zip"
3
3
 
4
4
  module Xsv
5
5
  # An OOXML Spreadsheet document is called a Workbook. A Workbook consists of
@@ -15,11 +15,11 @@ module Xsv
15
15
  # Open the workbook of the given filename, string or buffer. For additional
16
16
  # options see {.initialize}
17
17
  def self.open(data, **kws)
18
- if data.is_a?(IO)
18
+ if data.is_a?(IO) || data.respond_to?(:read) # is it a buffer?
19
19
  @workbook = self.new(Zip::File.open_buffer(data), **kws)
20
- elsif data.start_with?("PK\x03\x04")
20
+ elsif data.start_with?("PK\x03\x04") # is it a string containing a filename?
21
21
  @workbook = self.new(Zip::File.open_buffer(data), **kws)
22
- else
22
+ else # must be a filename
23
23
  @workbook = self.new(Zip::File.open(data), **kws)
24
24
  end
25
25
  end
@@ -41,6 +41,8 @@ module Xsv
41
41
 
42
42
  fetch_shared_strings
43
43
  fetch_styles
44
+ fetch_sheets_ids
45
+ fetch_relationships
44
46
  fetch_sheets
45
47
  end
46
48
 
@@ -56,15 +58,27 @@ module Xsv
56
58
  @sheets = nil
57
59
  @xfs = nil
58
60
  @numFmts = nil
61
+ @relationships = nil
59
62
  @shared_strings = nil
63
+ @sheets_ids = nil
60
64
 
61
65
  true
62
66
  end
63
67
 
68
+ # Returns an array of sheets for the case of same name sheets.
69
+ # @param [String] name
70
+ # @return [Array<Xsv::Sheet>]
71
+ def sheets_by_name(name)
72
+ @sheets.select { |s| s.name == name }
73
+ end
74
+
64
75
  private
65
76
 
66
77
  def fetch_shared_strings
67
- stream = @zip.glob("xl/sharedStrings.xml").first.get_input_stream
78
+ handle = @zip.glob("xl/sharedStrings.xml").first
79
+ return if handle.nil?
80
+
81
+ stream = handle.get_input_stream
68
82
  @shared_strings = SharedStringsParser.parse(stream)
69
83
 
70
84
  stream.close
@@ -80,8 +94,24 @@ module Xsv
80
94
  @zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
81
95
  a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
82
96
  end.each do |entry|
83
- @sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size)
97
+ rel = @relationships.detect { |r| entry.name.end_with?(r[:Target]) && r[:Type].end_with?("worksheet") }
98
+ sheet_ids = @sheets_ids.detect { |i| i[:r_id] == rel[:Id] }
99
+ @sheets << Xsv::Sheet.new(self, entry.get_input_stream, entry.size, sheet_ids)
84
100
  end
85
101
  end
102
+
103
+ def fetch_sheets_ids
104
+ stream = @zip.glob("xl/workbook.xml").first.get_input_stream
105
+ @sheets_ids = SheetsIdsHandler.get_sheets_ids(stream)
106
+
107
+ stream.close
108
+ end
109
+
110
+ def fetch_relationships
111
+ stream = @zip.glob("xl/_rels/workbook.xml.rels").first.get_input_stream
112
+ @relationships = RelationshipsHandler.get_relations(stream)
113
+
114
+ stream.close
115
+ end
86
116
  end
87
117
  end
@@ -1,23 +1,22 @@
1
-
2
1
  lib = File.expand_path("../lib", __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "xsv/version"
5
4
 
6
5
  Gem::Specification.new do |spec|
7
- spec.name = "xsv"
8
- spec.version = Xsv::VERSION
9
- spec.authors = ["Martijn Storck"]
10
- spec.email = ["martijn@storck.io"]
6
+ spec.name = "xsv"
7
+ spec.version = Xsv::VERSION
8
+ spec.authors = ["Martijn Storck"]
9
+ spec.email = ["martijn@storck.io"]
11
10
 
12
- spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
13
- spec.description = <<-EOF
11
+ spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
12
+ spec.description = <<-EOF
14
13
  Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
15
14
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
16
15
  sense that it provides nothing a CSV reader wouldn't, meaning it only
17
16
  deals with minimal formatting and cannot create or modify documents.
18
17
  EOF
19
- spec.homepage = "https://github.com/martijn/xsv"
20
- spec.license = "MIT"
18
+ spec.homepage = "https://github.com/martijn/xsv"
19
+ spec.license = "MIT"
21
20
 
22
21
  if spec.respond_to?(:metadata)
23
22
  spec.metadata["homepage_uri"] = spec.homepage
@@ -25,19 +24,19 @@ Gem::Specification.new do |spec|
25
24
  spec.metadata["changelog_uri"] = "https://github.com/martijn/xsv/CHANGELOG.md"
26
25
  else
27
26
  raise "RubyGems 2.0 or newer is required to protect against " \
28
- "public gem pushes."
27
+ "public gem pushes."
29
28
  end
30
29
 
31
30
  # Specify which files should be added to the gem when it is released.
32
31
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
33
- spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
32
+ spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
34
33
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
35
34
  end
36
- spec.bindir = "exe"
37
- spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
35
+ spec.bindir = "exe"
36
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
38
37
  spec.require_paths = ["lib"]
39
38
 
40
- spec.required_ruby_version = '~> 2.5'
39
+ spec.required_ruby_version = "~> 2.5"
41
40
 
42
41
  spec.add_dependency "rubyzip", ">= 1.3", "< 3"
43
42
  spec.add_dependency "ox", ">= 2.9"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-04-03 00:00:00.000000000 Z
11
+ date: 2020-06-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -108,14 +108,15 @@ files:
108
108
  - bin/setup
109
109
  - lib/xsv.rb
110
110
  - lib/xsv/helpers.rb
111
+ - lib/xsv/relationships_handler.rb
111
112
  - lib/xsv/shared_strings_parser.rb
112
113
  - lib/xsv/sheet.rb
113
114
  - lib/xsv/sheet_bounds_handler.rb
114
115
  - lib/xsv/sheet_rows_handler.rb
116
+ - lib/xsv/sheets_ids_handler.rb
115
117
  - lib/xsv/styles_handler.rb
116
118
  - lib/xsv/version.rb
117
119
  - lib/xsv/workbook.rb
118
- - test.sh
119
120
  - xsv.gemspec
120
121
  homepage: https://github.com/martijn/xsv
121
122
  licenses:
data/test.sh DELETED
@@ -1,3 +0,0 @@
1
- #!/bin/sh
2
-
3
- ruby -Ilib:test test/*_test.rb