saxlsx 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -3
- data/Gemfile +5 -0
- data/README.md +19 -1
- data/Rakefile +68 -0
- data/lib/saxlsx/file_system.rb +3 -2
- data/lib/saxlsx/rows_collection.rb +3 -3
- data/lib/saxlsx/rows_collection_parser.rb +4 -5
- data/lib/saxlsx/shared_string_collection_parser.rb +6 -1
- data/lib/saxlsx/sheet.rb +3 -3
- data/lib/saxlsx/sheet_collection.rb +3 -3
- data/lib/saxlsx/sheet_collection_parser.rb +33 -8
- data/lib/saxlsx/version.rb +1 -1
- data/lib/saxlsx/workbook.rb +8 -0
- data/spec/data/Spec1904.xlsx +0 -0
- data/spec/sheet_spec.rb +14 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48288886c8097438a2f3527b42c31f0db2a63c34
|
4
|
+
data.tar.gz: 3386ac7f891cc701d09697e5620d4d5aafee216b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e8ca7c015a5b34310b2008f70ea9eec36abdcae99dd4fd29ba70711de286f8dfb37a1cbf4ff7e295b98bd1a2d2cdb700f9b83f9cd790df16d9818a943792c005
|
7
|
+
data.tar.gz: 7dea47237742b543733e9aab3ffdd2c8d11cb9a8a7826e63b244dd20b782dc5661c16bccbef23485cec81fb9855dd1069799d351f67555f35dcb0509a8f30763
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,12 @@
|
|
2
2
|
|
3
3
|
[](https://travis-ci.org/mak-it/saxlsx)
|
4
4
|
|
5
|
-
Fast XLSX reader on top of Ox SAX parser.
|
5
|
+
**Fast** and memory efficient XLSX reader on top of Ox SAX parser.
|
6
|
+
|
7
|
+
It reads row by row and doesn't store the whole sheet in memory, so this
|
8
|
+
approach is more suitable when parsing big files. This also means that functions
|
9
|
+
and references will not work, as this style of parsing doesn't know
|
10
|
+
anything about other rows.
|
6
11
|
|
7
12
|
## Installation
|
8
13
|
|
@@ -36,6 +41,19 @@ Saxlsx::Workbook.open filename do |w|
|
|
36
41
|
end
|
37
42
|
```
|
38
43
|
|
44
|
+
## How fast is it?
|
45
|
+
|
46
|
+
```bash
|
47
|
+
$ rake bench
|
48
|
+
```
|
49
|
+
|
50
|
+
```
|
51
|
+
creek 2.610000 0.060000 2.670000 ( 2.704594)
|
52
|
+
rubyXL 3.830000 0.130000 3.960000 ( 3.985651)
|
53
|
+
saxlsx 0.750000 0.010000 0.760000 ( 0.785445)
|
54
|
+
simple_xlsx_reader 1.870000 0.040000 1.910000 ( 1.940999)
|
55
|
+
```
|
56
|
+
|
39
57
|
## Contributing
|
40
58
|
|
41
59
|
1. Fork it
|
data/Rakefile
CHANGED
@@ -4,3 +4,71 @@ require "rspec/core/rake_task"
|
|
4
4
|
Bundler::GemHelper.install_tasks
|
5
5
|
RSpec::Core::RakeTask.new(:spec)
|
6
6
|
task :default => :spec
|
7
|
+
|
8
|
+
|
9
|
+
task :bench do
|
10
|
+
require 'benchmark'
|
11
|
+
require 'axlsx'
|
12
|
+
require 'saxlsx'
|
13
|
+
require 'rubyXL'
|
14
|
+
require 'simple_xlsx_reader'
|
15
|
+
require 'creek'
|
16
|
+
|
17
|
+
path = "tmp/bench.xlsx"
|
18
|
+
unless File.exists?(path)
|
19
|
+
puts "* Generating #{path}"
|
20
|
+
FileUtils.mkdir_p File.dirname(path)
|
21
|
+
Axlsx::Package.new do |p|
|
22
|
+
money_style = p.workbook.styles.add_style(
|
23
|
+
num_fmt: 5, format_code: "€0.000"
|
24
|
+
)
|
25
|
+
p.workbook.add_worksheet(:name => "Pie Chart") do |sheet|
|
26
|
+
10000.times do
|
27
|
+
sheet.add_row(
|
28
|
+
[Date.today, Time.now, 1000, 3.14, "Long" * 100],
|
29
|
+
types: [:date, :time, :integer, :float, :string],
|
30
|
+
style: [nil, nil, nil, money_style, nil]
|
31
|
+
)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
p.use_shared_strings = true
|
35
|
+
p.serialize(path)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
Benchmark.benchmark('', 20) do |x|
|
40
|
+
x.report "creek" do
|
41
|
+
w = Creek::Book.new path
|
42
|
+
w.sheets.each do |s|
|
43
|
+
s.rows.each do |r|
|
44
|
+
r.values.inspect
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
x.report "rubyXL" do
|
49
|
+
w = RubyXL::Parser.parse path
|
50
|
+
w.worksheets.each do |s|
|
51
|
+
s.each do |r|
|
52
|
+
r.cells.map(&:value).inspect
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
x.report "saxlsx" do
|
57
|
+
Saxlsx::Workbook.open path do |w|
|
58
|
+
w.sheets.each do |s|
|
59
|
+
s.rows.each do |r|
|
60
|
+
r.to_a.inspect
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
x.report "simple_xlsx_reader" do
|
66
|
+
w = SimpleXlsxReader.open path
|
67
|
+
w.sheets.each do |s|
|
68
|
+
s.rows.each do |r|
|
69
|
+
r.to_a.inspect
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
data/lib/saxlsx/file_system.rb
CHANGED
@@ -19,11 +19,12 @@ module Saxlsx
|
|
19
19
|
end
|
20
20
|
|
21
21
|
def workbook
|
22
|
-
@zip.read('xl/workbook.xml')
|
22
|
+
@zip.read('xl/workbook.xml')
|
23
23
|
end
|
24
24
|
|
25
25
|
def shared_strings
|
26
|
-
@zip.
|
26
|
+
file = @zip.glob('xl/shared[Ss]trings.xml').first
|
27
|
+
@zip.read(file) if file
|
27
28
|
end
|
28
29
|
|
29
30
|
def styles
|
@@ -3,15 +3,15 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
include Enumerable
|
5
5
|
|
6
|
-
def initialize(index, file_system,
|
6
|
+
def initialize(index, file_system, workbook)
|
7
7
|
@index = index
|
8
8
|
@file_system = file_system
|
9
|
-
@
|
9
|
+
@workbook = workbook
|
10
10
|
@sheet = file_system.sheet(index)
|
11
11
|
end
|
12
12
|
|
13
13
|
def each(&block)
|
14
|
-
RowsCollectionParser.parse @index, @sheet, @
|
14
|
+
RowsCollectionParser.parse @index, @sheet, @workbook, &block
|
15
15
|
end
|
16
16
|
|
17
17
|
def count
|
@@ -35,13 +35,12 @@ module Saxlsx
|
|
35
35
|
49 => :unsupported # @
|
36
36
|
}
|
37
37
|
|
38
|
-
DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
|
39
|
-
|
40
38
|
def self.parse(index, data, workbook, &block)
|
41
39
|
SaxParser.parse self.new(workbook, &block), data
|
42
40
|
end
|
43
41
|
|
44
42
|
def initialize(workbook, &block)
|
43
|
+
@base_date = workbook.base_date
|
45
44
|
@shared_strings = workbook.shared_strings
|
46
45
|
@number_formats = workbook.number_formats
|
47
46
|
@block = block
|
@@ -97,16 +96,16 @@ module Saxlsx
|
|
97
96
|
def value_of(text)
|
98
97
|
case @current_type
|
99
98
|
when 's'
|
100
|
-
@shared_strings[text.to_i]
|
99
|
+
@shared_strings[text.to_i] || text
|
101
100
|
when 'b'
|
102
101
|
BooleanParser.parse text
|
103
102
|
else
|
104
103
|
case @current_number_format
|
105
104
|
when :date
|
106
|
-
|
105
|
+
@base_date + text.to_i
|
107
106
|
when :date_time
|
108
107
|
# Round time to seconds
|
109
|
-
date =
|
108
|
+
date = @base_date + (text.to_f * 86400).round.fdiv(86400)
|
110
109
|
DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
|
111
110
|
when :fixnum
|
112
111
|
text.to_i
|
@@ -2,7 +2,12 @@ module Saxlsx
|
|
2
2
|
class SharedStringCollectionParser < Ox::Sax
|
3
3
|
|
4
4
|
def self.parse(file_system, &block)
|
5
|
-
|
5
|
+
shared_strings = file_system.shared_strings
|
6
|
+
if shared_strings
|
7
|
+
SaxParser.parse self.new(&block), shared_strings
|
8
|
+
else
|
9
|
+
[]
|
10
|
+
end
|
6
11
|
end
|
7
12
|
|
8
13
|
def initialize(&block)
|
data/lib/saxlsx/sheet.rb
CHANGED
@@ -3,15 +3,15 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
attr_reader :name
|
5
5
|
|
6
|
-
def initialize(name, index, file_system,
|
6
|
+
def initialize(name, index, file_system, workbook)
|
7
7
|
@name = name
|
8
8
|
@index = index
|
9
9
|
@file_system = file_system
|
10
|
-
@
|
10
|
+
@workbook = workbook
|
11
11
|
end
|
12
12
|
|
13
13
|
def rows
|
14
|
-
@rows ||= RowsCollection.new(@index, @file_system, @
|
14
|
+
@rows ||= RowsCollection.new(@index, @file_system, @workbook)
|
15
15
|
end
|
16
16
|
|
17
17
|
def to_csv(path)
|
@@ -3,13 +3,13 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
include Enumerable
|
5
5
|
|
6
|
-
def initialize(file_system,
|
6
|
+
def initialize(file_system, workbook)
|
7
7
|
@file_system = file_system
|
8
|
-
@
|
8
|
+
@workbook = workbook
|
9
9
|
end
|
10
10
|
|
11
11
|
def each(&block)
|
12
|
-
SheetCollectionParser.parse @file_system, @
|
12
|
+
SheetCollectionParser.parse @file_system, @workbook, &block
|
13
13
|
end
|
14
14
|
|
15
15
|
end
|
@@ -3,30 +3,55 @@ module Saxlsx
|
|
3
3
|
|
4
4
|
CurrentSheet = Struct.new :index, :name
|
5
5
|
|
6
|
-
def self.parse(file_system,
|
7
|
-
SaxParser.parse
|
6
|
+
def self.parse(file_system, workbook, &block)
|
7
|
+
SaxParser.parse(
|
8
|
+
self.new(file_system, workbook, &block),
|
9
|
+
file_system.workbook
|
10
|
+
)
|
8
11
|
end
|
9
12
|
|
10
|
-
def initialize(file_system,
|
13
|
+
def initialize(file_system, workbook, &block)
|
11
14
|
@file_system = file_system
|
12
|
-
@
|
15
|
+
@workbook = workbook
|
13
16
|
@block = block
|
14
17
|
@index = -1
|
18
|
+
@workbook_pr = false
|
15
19
|
end
|
16
20
|
|
17
21
|
def start_element(name)
|
18
|
-
|
22
|
+
case name
|
23
|
+
when :sheet
|
24
|
+
@current_sheet = CurrentSheet.new(@index += 1)
|
25
|
+
when :workbookPr
|
26
|
+
@workbook_pr = true
|
27
|
+
end
|
19
28
|
end
|
20
29
|
|
21
30
|
def end_element(name)
|
22
|
-
|
23
|
-
|
31
|
+
case name
|
32
|
+
when :sheet
|
33
|
+
@block.call Sheet.new(
|
34
|
+
@current_sheet.name,
|
35
|
+
@current_sheet.index,
|
36
|
+
@file_system,
|
37
|
+
@workbook
|
38
|
+
)
|
24
39
|
@current_sheet = nil
|
40
|
+
when :workbookPr
|
41
|
+
@workbook_pr = false
|
25
42
|
end
|
26
43
|
end
|
27
44
|
|
28
45
|
def attr(name, value)
|
29
|
-
|
46
|
+
if @current_sheet
|
47
|
+
if name == :name
|
48
|
+
@current_sheet.name = value
|
49
|
+
end
|
50
|
+
elsif @workbook_pr
|
51
|
+
if name == :date1904 && value =~ /true|1/i
|
52
|
+
@workbook.date1904 = true
|
53
|
+
end
|
54
|
+
end
|
30
55
|
end
|
31
56
|
|
32
57
|
end
|
data/lib/saxlsx/version.rb
CHANGED
data/lib/saxlsx/workbook.rb
CHANGED
@@ -1,5 +1,9 @@
|
|
1
1
|
module Saxlsx
|
2
2
|
class Workbook
|
3
|
+
DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
|
4
|
+
DATE_SYSTEM_1904 = DateTime.new(1904, 1, 1)
|
5
|
+
|
6
|
+
attr_accessor :date1904
|
3
7
|
|
4
8
|
def self.open(filename)
|
5
9
|
begin
|
@@ -35,6 +39,10 @@ module Saxlsx
|
|
35
39
|
@number_formats ||= StyleCollection.new(@file_system).to_a
|
36
40
|
end
|
37
41
|
|
42
|
+
def base_date
|
43
|
+
@base_date ||= date1904 ? DATE_SYSTEM_1904 : DATE_SYSTEM_1900
|
44
|
+
end
|
45
|
+
|
38
46
|
def to_csv(path)
|
39
47
|
sheets.each { |s| s.to_csv path }
|
40
48
|
end
|
Binary file
|
data/spec/sheet_spec.rb
CHANGED
@@ -90,4 +90,18 @@ describe Sheet do
|
|
90
90
|
end
|
91
91
|
end
|
92
92
|
|
93
|
+
context 'with 1904 date system' do
|
94
|
+
let(:filename) { "#{File.dirname(__FILE__)}/data/Spec1904.xlsx" }
|
95
|
+
|
96
|
+
it 'should use 1904 date system when converting dates' do
|
97
|
+
Workbook.open filename do |w|
|
98
|
+
w.sheets[0].tap do |s|
|
99
|
+
s.rows[0].should eq [
|
100
|
+
DateTime.new(1970, 1, 1, 1, 0, 0),
|
101
|
+
DateTime.new(1970, 1, 1)
|
102
|
+
]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
93
107
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: saxlsx
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Edgars Beigarts
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rubyzip
|
@@ -126,6 +126,7 @@ files:
|
|
126
126
|
- saxlsx.gemspec
|
127
127
|
- spec/column_name_generator_spec.rb
|
128
128
|
- spec/data/Spec.xlsx
|
129
|
+
- spec/data/Spec1904.xlsx
|
129
130
|
- spec/sheet_spec.rb
|
130
131
|
- spec/spec_helper.rb
|
131
132
|
- spec/workbook_spec.rb
|
@@ -156,6 +157,7 @@ summary: Fast xlsx reader on top of Ox SAX parser
|
|
156
157
|
test_files:
|
157
158
|
- spec/column_name_generator_spec.rb
|
158
159
|
- spec/data/Spec.xlsx
|
160
|
+
- spec/data/Spec1904.xlsx
|
159
161
|
- spec/sheet_spec.rb
|
160
162
|
- spec/spec_helper.rb
|
161
163
|
- spec/workbook_spec.rb
|