saxlsx 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -3
- data/Gemfile +5 -0
- data/README.md +19 -1
- data/Rakefile +68 -0
- data/lib/saxlsx/file_system.rb +3 -2
- data/lib/saxlsx/rows_collection.rb +3 -3
- data/lib/saxlsx/rows_collection_parser.rb +4 -5
- data/lib/saxlsx/shared_string_collection_parser.rb +6 -1
- data/lib/saxlsx/sheet.rb +3 -3
- data/lib/saxlsx/sheet_collection.rb +3 -3
- data/lib/saxlsx/sheet_collection_parser.rb +33 -8
- data/lib/saxlsx/version.rb +1 -1
- data/lib/saxlsx/workbook.rb +8 -0
- data/spec/data/Spec1904.xlsx +0 -0
- data/spec/sheet_spec.rb +14 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48288886c8097438a2f3527b42c31f0db2a63c34
|
4
|
+
data.tar.gz: 3386ac7f891cc701d09697e5620d4d5aafee216b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8ca7c015a5b34310b2008f70ea9eec36abdcae99dd4fd29ba70711de286f8dfb37a1cbf4ff7e295b98bd1a2d2cdb700f9b83f9cd790df16d9818a943792c005
|
7
|
+
data.tar.gz: 7dea47237742b543733e9aab3ffdd2c8d11cb9a8a7826e63b244dd20b782dc5661c16bccbef23485cec81fb9855dd1069799d351f67555f35dcb0509a8f30763
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,12 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/mak-it/saxlsx.png?branch=master)](https://travis-ci.org/mak-it/saxlsx)
|
4
4
|
|
5
|
-
Fast XLSX reader on top of Ox SAX parser.
|
5
|
+
**Fast** and memory efficient XLSX reader on top of Ox SAX parser.
|
6
|
+
|
7
|
+
It reads row by row and doesn't store the whole sheet in memory, so this
|
8
|
+
approach is more suitable when parsing big files. This also means that functions
|
9
|
+
and references will not work, as this style of parsing doesn't know
|
10
|
+
anything about other rows.
|
6
11
|
|
7
12
|
## Installation
|
8
13
|
|
@@ -36,6 +41,19 @@ Saxlsx::Workbook.open filename do |w|
|
|
36
41
|
end
|
37
42
|
```
|
38
43
|
|
44
|
+
## How fast is it?
|
45
|
+
|
46
|
+
```bash
|
47
|
+
$ rake bench
|
48
|
+
```
|
49
|
+
|
50
|
+
```
|
51
|
+
creek 2.610000 0.060000 2.670000 ( 2.704594)
|
52
|
+
rubyXL 3.830000 0.130000 3.960000 ( 3.985651)
|
53
|
+
saxlsx 0.750000 0.010000 0.760000 ( 0.785445)
|
54
|
+
simple_xlsx_reader 1.870000 0.040000 1.910000 ( 1.940999)
|
55
|
+
```
|
56
|
+
|
39
57
|
## Contributing
|
40
58
|
|
41
59
|
1. Fork it
|
data/Rakefile
CHANGED
@@ -4,3 +4,71 @@ require "rspec/core/rake_task"
|
|
4
4
|
Bundler::GemHelper.install_tasks
|
5
5
|
RSpec::Core::RakeTask.new(:spec)
|
6
6
|
task :default => :spec
|
7
|
+
|
8
|
+
|
9
|
+
task :bench do
|
10
|
+
require 'benchmark'
|
11
|
+
require 'axlsx'
|
12
|
+
require 'saxlsx'
|
13
|
+
require 'rubyXL'
|
14
|
+
require 'simple_xlsx_reader'
|
15
|
+
require 'creek'
|
16
|
+
|
17
|
+
path = "tmp/bench.xlsx"
|
18
|
+
unless File.exists?(path)
|
19
|
+
puts "* Generating #{path}"
|
20
|
+
FileUtils.mkdir_p File.dirname(path)
|
21
|
+
Axlsx::Package.new do |p|
|
22
|
+
money_style = p.workbook.styles.add_style(
|
23
|
+
num_fmt: 5, format_code: "€0.000"
|
24
|
+
)
|
25
|
+
p.workbook.add_worksheet(:name => "Pie Chart") do |sheet|
|
26
|
+
10000.times do
|
27
|
+
sheet.add_row(
|
28
|
+
[Date.today, Time.now, 1000, 3.14, "Long" * 100],
|
29
|
+
types: [:date, :time, :integer, :float, :string],
|
30
|
+
style: [nil, nil, nil, money_style, nil]
|
31
|
+
)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
p.use_shared_strings = true
|
35
|
+
p.serialize(path)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
Benchmark.benchmark('', 20) do |x|
|
40
|
+
x.report "creek" do
|
41
|
+
w = Creek::Book.new path
|
42
|
+
w.sheets.each do |s|
|
43
|
+
s.rows.each do |r|
|
44
|
+
r.values.inspect
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
x.report "rubyXL" do
|
49
|
+
w = RubyXL::Parser.parse path
|
50
|
+
w.worksheets.each do |s|
|
51
|
+
s.each do |r|
|
52
|
+
r.cells.map(&:value).inspect
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
x.report "saxlsx" do
|
57
|
+
Saxlsx::Workbook.open path do |w|
|
58
|
+
w.sheets.each do |s|
|
59
|
+
s.rows.each do |r|
|
60
|
+
r.to_a.inspect
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
x.report "simple_xlsx_reader" do
|
66
|
+
w = SimpleXlsxReader.open path
|
67
|
+
w.sheets.each do |s|
|
68
|
+
s.rows.each do |r|
|
69
|
+
r.to_a.inspect
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/saxlsx/file_system.rb
CHANGED
@@ -19,11 +19,12 @@ module Saxlsx
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def workbook
|
22
|
-
@zip.read('xl/workbook.xml')
|
22
|
+
@zip.read('xl/workbook.xml')
|
23
23
|
end
|
24
24
|
|
25
25
|
def shared_strings
|
26
|
-
@zip.
|
26
|
+
file = @zip.glob('xl/shared[Ss]trings.xml').first
|
27
|
+
@zip.read(file) if file
|
27
28
|
end
|
28
29
|
|
29
30
|
def styles
|
@@ -3,15 +3,15 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
include Enumerable
|
5
5
|
|
6
|
-
def initialize(index, file_system,
|
6
|
+
def initialize(index, file_system, workbook)
|
7
7
|
@index = index
|
8
8
|
@file_system = file_system
|
9
|
-
@
|
9
|
+
@workbook = workbook
|
10
10
|
@sheet = file_system.sheet(index)
|
11
11
|
end
|
12
12
|
|
13
13
|
def each(&block)
|
14
|
-
RowsCollectionParser.parse @index, @sheet, @
|
14
|
+
RowsCollectionParser.parse @index, @sheet, @workbook, &block
|
15
15
|
end
|
16
16
|
|
17
17
|
def count
|
@@ -35,13 +35,12 @@ module Saxlsx
|
|
35
35
|
49 => :unsupported # @
|
36
36
|
}
|
37
37
|
|
38
|
-
DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
|
39
|
-
|
40
38
|
def self.parse(index, data, workbook, &block)
|
41
39
|
SaxParser.parse self.new(workbook, &block), data
|
42
40
|
end
|
43
41
|
|
44
42
|
def initialize(workbook, &block)
|
43
|
+
@base_date = workbook.base_date
|
45
44
|
@shared_strings = workbook.shared_strings
|
46
45
|
@number_formats = workbook.number_formats
|
47
46
|
@block = block
|
@@ -97,16 +96,16 @@ module Saxlsx
|
|
97
96
|
def value_of(text)
|
98
97
|
case @current_type
|
99
98
|
when 's'
|
100
|
-
@shared_strings[text.to_i]
|
99
|
+
@shared_strings[text.to_i] || text
|
101
100
|
when 'b'
|
102
101
|
BooleanParser.parse text
|
103
102
|
else
|
104
103
|
case @current_number_format
|
105
104
|
when :date
|
106
|
-
|
105
|
+
@base_date + text.to_i
|
107
106
|
when :date_time
|
108
107
|
# Round time to seconds
|
109
|
-
date =
|
108
|
+
date = @base_date + (text.to_f * 86400).round.fdiv(86400)
|
110
109
|
DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
|
111
110
|
when :fixnum
|
112
111
|
text.to_i
|
@@ -2,7 +2,12 @@ module Saxlsx
|
|
2
2
|
class SharedStringCollectionParser < Ox::Sax
|
3
3
|
|
4
4
|
def self.parse(file_system, &block)
|
5
|
-
|
5
|
+
shared_strings = file_system.shared_strings
|
6
|
+
if shared_strings
|
7
|
+
SaxParser.parse self.new(&block), shared_strings
|
8
|
+
else
|
9
|
+
[]
|
10
|
+
end
|
6
11
|
end
|
7
12
|
|
8
13
|
def initialize(&block)
|
data/lib/saxlsx/sheet.rb
CHANGED
@@ -3,15 +3,15 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
attr_reader :name
|
5
5
|
|
6
|
-
def initialize(name, index, file_system,
|
6
|
+
def initialize(name, index, file_system, workbook)
|
7
7
|
@name = name
|
8
8
|
@index = index
|
9
9
|
@file_system = file_system
|
10
|
-
@
|
10
|
+
@workbook = workbook
|
11
11
|
end
|
12
12
|
|
13
13
|
def rows
|
14
|
-
@rows ||= RowsCollection.new(@index, @file_system, @
|
14
|
+
@rows ||= RowsCollection.new(@index, @file_system, @workbook)
|
15
15
|
end
|
16
16
|
|
17
17
|
def to_csv(path)
|
@@ -3,13 +3,13 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
include Enumerable
|
5
5
|
|
6
|
-
def initialize(file_system,
|
6
|
+
def initialize(file_system, workbook)
|
7
7
|
@file_system = file_system
|
8
|
-
@
|
8
|
+
@workbook = workbook
|
9
9
|
end
|
10
10
|
|
11
11
|
def each(&block)
|
12
|
-
SheetCollectionParser.parse @file_system, @
|
12
|
+
SheetCollectionParser.parse @file_system, @workbook, &block
|
13
13
|
end
|
14
14
|
|
15
15
|
end
|
@@ -3,30 +3,55 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
CurrentSheet = Struct.new :index, :name
|
5
5
|
|
6
|
-
def self.parse(file_system,
|
7
|
-
SaxParser.parse
|
6
|
+
def self.parse(file_system, workbook, &block)
|
7
|
+
SaxParser.parse(
|
8
|
+
self.new(file_system, workbook, &block),
|
9
|
+
file_system.workbook
|
10
|
+
)
|
8
11
|
end
|
9
12
|
|
10
|
-
def initialize(file_system,
|
13
|
+
def initialize(file_system, workbook, &block)
|
11
14
|
@file_system = file_system
|
12
|
-
@
|
15
|
+
@workbook = workbook
|
13
16
|
@block = block
|
14
17
|
@index = -1
|
18
|
+
@workbook_pr = false
|
15
19
|
end
|
16
20
|
|
17
21
|
def start_element(name)
|
18
|
-
|
22
|
+
case name
|
23
|
+
when :sheet
|
24
|
+
@current_sheet = CurrentSheet.new(@index += 1)
|
25
|
+
when :workbookPr
|
26
|
+
@workbook_pr = true
|
27
|
+
end
|
19
28
|
end
|
20
29
|
|
21
30
|
def end_element(name)
|
22
|
-
|
23
|
-
|
31
|
+
case name
|
32
|
+
when :sheet
|
33
|
+
@block.call Sheet.new(
|
34
|
+
@current_sheet.name,
|
35
|
+
@current_sheet.index,
|
36
|
+
@file_system,
|
37
|
+
@workbook
|
38
|
+
)
|
24
39
|
@current_sheet = nil
|
40
|
+
when :workbookPr
|
41
|
+
@workbook_pr = false
|
25
42
|
end
|
26
43
|
end
|
27
44
|
|
28
45
|
def attr(name, value)
|
29
|
-
|
46
|
+
if @current_sheet
|
47
|
+
if name == :name
|
48
|
+
@current_sheet.name = value
|
49
|
+
end
|
50
|
+
elsif @workbook_pr
|
51
|
+
if name == :date1904 && value =~ /true|1/i
|
52
|
+
@workbook.date1904 = true
|
53
|
+
end
|
54
|
+
end
|
30
55
|
end
|
31
56
|
|
32
57
|
end
|
data/lib/saxlsx/version.rb
CHANGED
data/lib/saxlsx/workbook.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
module Saxlsx
|
2
2
|
class Workbook
|
3
|
+
DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
|
4
|
+
DATE_SYSTEM_1904 = DateTime.new(1904, 1, 1)
|
5
|
+
|
6
|
+
attr_accessor :date1904
|
3
7
|
|
4
8
|
def self.open(filename)
|
5
9
|
begin
|
@@ -35,6 +39,10 @@ module Saxlsx
|
|
35
39
|
@number_formats ||= StyleCollection.new(@file_system).to_a
|
36
40
|
end
|
37
41
|
|
42
|
+
def base_date
|
43
|
+
@base_date ||= date1904 ? DATE_SYSTEM_1904 : DATE_SYSTEM_1900
|
44
|
+
end
|
45
|
+
|
38
46
|
def to_csv(path)
|
39
47
|
sheets.each { |s| s.to_csv path }
|
40
48
|
end
|
Binary file
|
data/spec/sheet_spec.rb
CHANGED
@@ -90,4 +90,18 @@ describe Sheet do
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
|
93
|
+
context 'with 1904 date system' do
|
94
|
+
let(:filename) { "#{File.dirname(__FILE__)}/data/Spec1904.xlsx" }
|
95
|
+
|
96
|
+
it 'should use 1904 date system when converting dates' do
|
97
|
+
Workbook.open filename do |w|
|
98
|
+
w.sheets[0].tap do |s|
|
99
|
+
s.rows[0].should eq [
|
100
|
+
DateTime.new(1970, 1, 1, 1, 0, 0),
|
101
|
+
DateTime.new(1970, 1, 1)
|
102
|
+
]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
93
107
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxlsx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edgars Beigarts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- saxlsx.gemspec
|
127
127
|
- spec/column_name_generator_spec.rb
|
128
128
|
- spec/data/Spec.xlsx
|
129
|
+
- spec/data/Spec1904.xlsx
|
129
130
|
- spec/sheet_spec.rb
|
130
131
|
- spec/spec_helper.rb
|
131
132
|
- spec/workbook_spec.rb
|
@@ -156,6 +157,7 @@ summary: Fast xlsx reader on top of Ox SAX parser
|
|
156
157
|
test_files:
|
157
158
|
- spec/column_name_generator_spec.rb
|
158
159
|
- spec/data/Spec.xlsx
|
160
|
+
- spec/data/Spec1904.xlsx
|
159
161
|
- spec/sheet_spec.rb
|
160
162
|
- spec/spec_helper.rb
|
161
163
|
- spec/workbook_spec.rb
|