xsv 0.3.5 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4ebd402667e57a9db010846048605e8afa079fcac62a71020857c792446a81ba
4
- data.tar.gz: 56702a4f106aa26c9dae4bcca624ca6ba80ccd1287c4e8138469c8215a918333
3
+ metadata.gz: 4c6b42a947e6f518a9284de8262e1c1327267a6e1fdd55ea6d14fffba1f1b19d
4
+ data.tar.gz: '04887e8c4e50d3bb5762c6738237442d3b6360f4b96304b5a802215d7f4240ca'
5
5
  SHA512:
6
- metadata.gz: '02875bc9d2b26fda74f1da6cdfdf1af01d2a0d77735d18cf8c00a5a350bd8f9e2c11129985819f59e8876e51bde140ef163ae18040c251e4caa8fecd420da60f'
7
- data.tar.gz: c9bb1f599cd28175792d86cb92d019906c0b39d67bdf2529021a412dcd91a06a7510da0e71d0a186edef9cd4519c0f466f9325941a613211d19e97ecf50f829f
6
+ metadata.gz: e8ad674f6735a1711190469836f30911bb2ebce502d3e99bc268c1c439c7cf9b8f6841189c76950c1722a0861a769a00b6ee7f15e5ab1cb2763a3d6b05342057
7
+ data.tar.gz: 07c01bc46762348b87f745fd1c7b3aa9d16c0051916b46473a0c0c76bf4d1551f5d2cabc724a612069f3ce83eb959cbe06f4114bef361e615fc37b9ceae51193
data/README.md CHANGED
@@ -1,10 +1,18 @@
1
1
  # Xsv .xlsx reader
2
2
 
3
- Xsv is a very basic parser for Office Open XML spreadsheet files (.xlsx files)
4
- that aims to provide feature parity with common CSV readers with high
5
- performance. This means it only parses values to basic Ruby types and does not
6
- deal with most formatting or more advanced functionality. The goal is to allow
7
- for fast parsing of large worksheets with minimal RAM and CPU consumption.
3
+ [![Travis CI](https://img.shields.io/travis/martijn/xsv/master)](https://travis-ci.org/martijn/xsv)
4
+ [![Yard Docs](http://img.shields.io/badge/yard-docs-blue.svg)](https://rubydoc.info/github/martijn/xsv)
5
+
6
+ Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
7
+ (commonly known as Excel or .xlsx files). It strives to be minimal in the
8
+ sense that it provides nothing a CSV reader wouldn't, meaning it only
9
+ deals with minimal formatting and cannot create or modify documents.
10
+
11
+ Xsv is designed for worksheets with a single table of data, optionally
12
+ with a header row. It only casts values to basic Ruby types (integer, float,
13
+ date and time) and does not deal with most formatting or more advanced
14
+ functionality. The goal is to allow for fast parsing of large worksheets with
15
+ minimal RAM and CPU consumption.
8
16
 
9
17
  Xsv stands for 'Excel Separated Values', because Excel just gets in the way.
10
18
 
@@ -80,9 +88,9 @@ sheet:
80
88
 
81
89
  - In array mode, your data starts on the first row
82
90
 
83
- - In has mode the first row of the sheet contains headers, followed by rows of data
91
+ - In hash mode the first row of the sheet contains headers, followed by rows of data
84
92
 
85
- If your data or headers does not start on the first row of the sheet you can
93
+ If your data or headers do not start on the first row of the sheet you can
86
94
  tell Xsv to skip a number of rows:
87
95
 
88
96
  ```ruby
@@ -6,7 +6,7 @@ module Xsv
6
6
  def self.parse(io)
7
7
  strings = []
8
8
  handler = new { |s| strings << s }
9
- Ox.sax_parse(handler, io)
9
+ Ox.sax_parse(handler, io.read)
10
10
  return strings
11
11
  end
12
12
 
data/lib/xsv/sheet.rb CHANGED
@@ -28,7 +28,7 @@ module Xsv
28
28
  # There is no need to create Sheets from application code.
29
29
  #
30
30
  # @param workbook [Workbook] The Workbook with shared data such as shared strings and styles
31
- # @param io [IO] A handle to an open worksheet XML file
31
+ # @param io [IO] A handle to an open worksheet XML file or a string with the XML contents
32
32
  def initialize(workbook, io)
33
33
  @workbook = workbook
34
34
  @io = io
@@ -45,12 +45,10 @@ module Xsv
45
45
  end
46
46
 
47
47
  # Iterate over rows, returning either hashes or arrays based on the current mode.
48
- def each_row
49
- @io.rewind
48
+ def each_row(&block)
49
+ @io.rewind if @io.respond_to?(:rewind)
50
50
 
51
- handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row) do |row|
52
- yield(row)
53
- end
51
+ handler = SheetRowsHandler.new(@mode, empty_row, @workbook, @row_skip, @last_row, &block)
54
52
 
55
53
  Ox.sax_parse(handler, @io)
56
54
 
@@ -17,7 +17,7 @@ module Xsv
17
17
  return rows, cols
18
18
  end
19
19
 
20
- sheet.rewind
20
+ sheet.rewind if sheet.respond_to?(:rewind)
21
21
  Ox.sax_parse(handler, sheet)
22
22
 
23
23
  return rows, cols
@@ -16,9 +16,7 @@ module Xsv
16
16
  when "e" # N/A
17
17
  nil
18
18
  when nil
19
- if @current_value == ""
20
- nil
21
- elsif @current_cell[:s]
19
+ if @current_cell[:s]
22
20
  style = @workbook.xfs[@current_cell[:s].to_i]
23
21
  numFmt = @workbook.numFmts[style[:numFmtId].to_i]
24
22
 
@@ -50,7 +48,7 @@ module Xsv
50
48
  @current_row = {}
51
49
  @current_row_attrs = {}
52
50
  @current_cell = {}
53
- @current_value = nil
51
+ @current_value = String.new
54
52
  @last_row = last_row
55
53
 
56
54
  if @mode == :hash
@@ -62,14 +60,14 @@ module Xsv
62
60
  case name
63
61
  when :c
64
62
  @state = name
65
- @current_cell = {}
66
- @current_value = String.new
63
+ @current_cell.clear
64
+ @current_value.clear
67
65
  when :v
68
66
  @state = name
69
67
  when :row
70
68
  @state = name
71
69
  @current_row = @empty_row.dup
72
- @current_row_attrs = {}
70
+ @current_row_attrs.clear
73
71
  else
74
72
  @state = nil
75
73
  end
@@ -11,7 +11,7 @@ module Xsv
11
11
  @numFmts = numFmts
12
12
  end
13
13
 
14
- Ox.sax_parse(handler, io)
14
+ Ox.sax_parse(handler, io.read)
15
15
  return @xfs, @numFmts
16
16
  end
17
17
 
data/lib/xsv/version.rb CHANGED
@@ -1,4 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
  module Xsv
3
- VERSION = "0.3.5"
3
+ VERSION = "0.3.6"
4
4
  end
data/lib/xsv/workbook.rb CHANGED
@@ -80,7 +80,15 @@ module Xsv
80
80
  @zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
81
81
  a.name[/\d+/].to_i <=> b.name[/\d+/].to_i
82
82
  end.each do |entry|
83
- @sheets << Xsv::Sheet.new(self, entry.get_input_stream)
83
+ # For smaller sheets, memory performance is a lot better if Ox is
84
+ # handed a string. For larger sheets this leads to awful performance.
85
+ # This is probably caused by either something in SheetRowsHandler or
86
+ # the interaction between Zip::InputStream and Ox
87
+ if entry.size > 100_000_000
88
+ @sheets << Xsv::Sheet.new(self, entry.get_input_stream)
89
+ else
90
+ @sheets << Xsv::Sheet.new(self, entry.get_input_stream.read)
91
+ end
84
92
  end
85
93
  end
86
94
  end
data/xsv.gemspec CHANGED
@@ -11,7 +11,7 @@ Gem::Specification.new do |spec|
11
11
 
12
12
  spec.summary = "A fast and lightweiggt xlsx parser that provides nothing a CSV parser wouldn't"
13
13
  spec.description = <<-EOF
14
- XSV is a fast, lightweight parser for Office Open XML spreadsheet files
14
+ Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
15
15
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
16
16
  sense that it provides nothing a CSV reader wouldn't, meaning it only
17
17
  deals with minimal formatting and cannot create or modify documents.
@@ -42,7 +42,7 @@ Gem::Specification.new do |spec|
42
42
  spec.add_dependency "rubyzip", "~> 2.2"
43
43
  spec.add_dependency "ox", "~> 2.13"
44
44
 
45
- spec.add_development_dependency "bundler", "~> 1.17"
45
+ spec.add_development_dependency "bundler", "< 3"
46
46
  spec.add_development_dependency "rake", "~> 13.0"
47
47
  spec.add_development_dependency "minitest", "~> 5.0"
48
48
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: xsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martijn Storck
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-02 00:00:00.000000000 Z
11
+ date: 2020-03-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -42,16 +42,16 @@ dependencies:
42
42
  name: bundler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - "<"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.17'
47
+ version: '3'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - "<"
53
53
  - !ruby/object:Gem::Version
54
- version: '1.17'
54
+ version: '3'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: rake
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -81,7 +81,7 @@ dependencies:
81
81
  - !ruby/object:Gem::Version
82
82
  version: '5.0'
83
83
  description: |2
84
- XSV is a fast, lightweight parser for Office Open XML spreadsheet files
84
+ Xsv is a fast, lightweight parser for Office Open XML spreadsheet files
85
85
  (commonly known as Excel or .xlsx files). It strives to be minimal in the
86
86
  sense that it provides nothing a CSV reader wouldn't, meaning it only
87
87
  deals with minimal formatting and cannot create or modify documents.