saxlsx 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5b0cbd91196c90e22a5dc4ede2bf372792cc5e3f
4
- data.tar.gz: 96bec50a0662ab5741c54e329c80c073c9c202e2
3
+ metadata.gz: 48288886c8097438a2f3527b42c31f0db2a63c34
4
+ data.tar.gz: 3386ac7f891cc701d09697e5620d4d5aafee216b
5
5
  SHA512:
6
- metadata.gz: e55094b6131ae28e50afb8bed91d91f15b200596f94cb6215f9bea6c5e9303d158f281ec6495d8cbb481528f3893ea614748d3fd3c9130b06555df6d98930e10
7
- data.tar.gz: fbd741be1e3b91784bc11502351a4195d31d0459de3765bd7de6acc64cebf4b5580a9eb0fc19bab0ad844f27ab316c84085ff9b4155bf76d52e5cef2f2d05738
6
+ metadata.gz: e8ca7c015a5b34310b2008f70ea9eec36abdcae99dd4fd29ba70711de286f8dfb37a1cbf4ff7e295b98bd1a2d2cdb700f9b83f9cd790df16d9818a943792c005
7
+ data.tar.gz: 7dea47237742b543733e9aab3ffdd2c8d11cb9a8a7826e63b244dd20b782dc5661c16bccbef23485cec81fb9855dd1069799d351f67555f35dcb0509a8f30763
@@ -1,5 +1,5 @@
1
1
  rvm:
2
- - 2.0.0
3
- - 2.1.0
4
- - rbx
2
+ - 2.0
3
+ - 2.1
4
+ - 2.2
5
5
  - ruby-head
data/Gemfile CHANGED
@@ -2,3 +2,8 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in saxlsx.gemspec
4
4
  gemspec
5
+
6
+ gem 'axlsx', '~> 2.1.0.pre'
7
+ gem 'rubyXL'
8
+ gem 'simple_xlsx_reader'
9
+ gem 'creek'
data/README.md CHANGED
@@ -2,7 +2,12 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/mak-it/saxlsx.png?branch=master)](https://travis-ci.org/mak-it/saxlsx)
4
4
 
5
- Fast XLSX reader on top of Ox SAX parser.
5
+ **Fast** and memory efficient XLSX reader on top of Ox SAX parser.
6
+
7
+ It reads row by row and doesn't store the whole sheet in memory, so this
8
+ approach is more suitable when parsing big files. This also means that functions
9
+ and references will not work, as this style of parsing doesn't know
10
+ anything about other rows.
6
11
 
7
12
  ## Installation
8
13
 
@@ -36,6 +41,19 @@ Saxlsx::Workbook.open filename do |w|
36
41
  end
37
42
  ```
38
43
 
44
+ ## How fast is it?
45
+
46
+ ```bash
47
+ $ rake bench
48
+ ```
49
+
50
+ ```
51
+ creek 2.610000 0.060000 2.670000 ( 2.704594)
52
+ rubyXL 3.830000 0.130000 3.960000 ( 3.985651)
53
+ saxlsx 0.750000 0.010000 0.760000 ( 0.785445)
54
+ simple_xlsx_reader 1.870000 0.040000 1.910000 ( 1.940999)
55
+ ```
56
+
39
57
  ## Contributing
40
58
 
41
59
  1. Fork it
data/Rakefile CHANGED
@@ -4,3 +4,71 @@ require "rspec/core/rake_task"
4
4
  Bundler::GemHelper.install_tasks
5
5
  RSpec::Core::RakeTask.new(:spec)
6
6
  task :default => :spec
7
+
8
+
9
+ task :bench do
10
+ require 'benchmark'
11
+ require 'axlsx'
12
+ require 'saxlsx'
13
+ require 'rubyXL'
14
+ require 'simple_xlsx_reader'
15
+ require 'creek'
16
+
17
+ path = "tmp/bench.xlsx"
18
+ unless File.exists?(path)
19
+ puts "* Generating #{path}"
20
+ FileUtils.mkdir_p File.dirname(path)
21
+ Axlsx::Package.new do |p|
22
+ money_style = p.workbook.styles.add_style(
23
+ num_fmt: 5, format_code: "€0.000"
24
+ )
25
+ p.workbook.add_worksheet(:name => "Pie Chart") do |sheet|
26
+ 10000.times do
27
+ sheet.add_row(
28
+ [Date.today, Time.now, 1000, 3.14, "Long" * 100],
29
+ types: [:date, :time, :integer, :float, :string],
30
+ style: [nil, nil, nil, money_style, nil]
31
+ )
32
+ end
33
+ end
34
+ p.use_shared_strings = true
35
+ p.serialize(path)
36
+ end
37
+ end
38
+
39
+ Benchmark.benchmark('', 20) do |x|
40
+ x.report "creek" do
41
+ w = Creek::Book.new path
42
+ w.sheets.each do |s|
43
+ s.rows.each do |r|
44
+ r.values.inspect
45
+ end
46
+ end
47
+ end
48
+ x.report "rubyXL" do
49
+ w = RubyXL::Parser.parse path
50
+ w.worksheets.each do |s|
51
+ s.each do |r|
52
+ r.cells.map(&:value).inspect
53
+ end
54
+ end
55
+ end
56
+ x.report "saxlsx" do
57
+ Saxlsx::Workbook.open path do |w|
58
+ w.sheets.each do |s|
59
+ s.rows.each do |r|
60
+ r.to_a.inspect
61
+ end
62
+ end
63
+ end
64
+ end
65
+ x.report "simple_xlsx_reader" do
66
+ w = SimpleXlsxReader.open path
67
+ w.sheets.each do |s|
68
+ s.rows.each do |r|
69
+ r.to_a.inspect
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -19,11 +19,12 @@ module Saxlsx
19
19
  end
20
20
 
21
21
  def workbook
22
- @zip.read('xl/workbook.xml').match(/<sheets>.*<\/sheets>/).to_s
22
+ @zip.read('xl/workbook.xml')
23
23
  end
24
24
 
25
25
  def shared_strings
26
- @zip.read('xl/sharedStrings.xml')
26
+ file = @zip.glob('xl/shared[Ss]trings.xml').first
27
+ @zip.read(file) if file
27
28
  end
28
29
 
29
30
  def styles
@@ -3,15 +3,15 @@ module Saxlsx
3
3
 
4
4
  include Enumerable
5
5
 
6
- def initialize(index, file_system, shared_strings)
6
+ def initialize(index, file_system, workbook)
7
7
  @index = index
8
8
  @file_system = file_system
9
- @shared_strings = shared_strings
9
+ @workbook = workbook
10
10
  @sheet = file_system.sheet(index)
11
11
  end
12
12
 
13
13
  def each(&block)
14
- RowsCollectionParser.parse @index, @sheet, @shared_strings, &block
14
+ RowsCollectionParser.parse @index, @sheet, @workbook, &block
15
15
  end
16
16
 
17
17
  def count
@@ -35,13 +35,12 @@ module Saxlsx
35
35
  49 => :unsupported # @
36
36
  }
37
37
 
38
- DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
39
-
40
38
  def self.parse(index, data, workbook, &block)
41
39
  SaxParser.parse self.new(workbook, &block), data
42
40
  end
43
41
 
44
42
  def initialize(workbook, &block)
43
+ @base_date = workbook.base_date
45
44
  @shared_strings = workbook.shared_strings
46
45
  @number_formats = workbook.number_formats
47
46
  @block = block
@@ -97,16 +96,16 @@ module Saxlsx
97
96
  def value_of(text)
98
97
  case @current_type
99
98
  when 's'
100
- @shared_strings[text.to_i]
99
+ @shared_strings[text.to_i] || text
101
100
  when 'b'
102
101
  BooleanParser.parse text
103
102
  else
104
103
  case @current_number_format
105
104
  when :date
106
- DATE_SYSTEM_1900 + text.to_i
105
+ @base_date + text.to_i
107
106
  when :date_time
108
107
  # Round time to seconds
109
- date = DATE_SYSTEM_1900 + (text.to_f * 86400).round.fdiv(86400)
108
+ date = @base_date + (text.to_f * 86400).round.fdiv(86400)
110
109
  DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
111
110
  when :fixnum
112
111
  text.to_i
@@ -2,7 +2,12 @@ module Saxlsx
2
2
  class SharedStringCollectionParser < Ox::Sax
3
3
 
4
4
  def self.parse(file_system, &block)
5
- SaxParser.parse self.new(&block), file_system.shared_strings
5
+ shared_strings = file_system.shared_strings
6
+ if shared_strings
7
+ SaxParser.parse self.new(&block), shared_strings
8
+ else
9
+ []
10
+ end
6
11
  end
7
12
 
8
13
  def initialize(&block)
@@ -3,15 +3,15 @@ module Saxlsx
3
3
 
4
4
  attr_reader :name
5
5
 
6
- def initialize(name, index, file_system, shared_strings)
6
+ def initialize(name, index, file_system, workbook)
7
7
  @name = name
8
8
  @index = index
9
9
  @file_system = file_system
10
- @shared_strings = shared_strings
10
+ @workbook = workbook
11
11
  end
12
12
 
13
13
  def rows
14
- @rows ||= RowsCollection.new(@index, @file_system, @shared_strings)
14
+ @rows ||= RowsCollection.new(@index, @file_system, @workbook)
15
15
  end
16
16
 
17
17
  def to_csv(path)
@@ -3,13 +3,13 @@ module Saxlsx
3
3
 
4
4
  include Enumerable
5
5
 
6
- def initialize(file_system, shared_strings)
6
+ def initialize(file_system, workbook)
7
7
  @file_system = file_system
8
- @shared_strings = shared_strings
8
+ @workbook = workbook
9
9
  end
10
10
 
11
11
  def each(&block)
12
- SheetCollectionParser.parse @file_system, @shared_strings, &block
12
+ SheetCollectionParser.parse @file_system, @workbook, &block
13
13
  end
14
14
 
15
15
  end
@@ -3,30 +3,55 @@ module Saxlsx
3
3
 
4
4
  CurrentSheet = Struct.new :index, :name
5
5
 
6
- def self.parse(file_system, shared_strings, &block)
7
- SaxParser.parse self.new(file_system, shared_strings, &block), file_system.workbook
6
+ def self.parse(file_system, workbook, &block)
7
+ SaxParser.parse(
8
+ self.new(file_system, workbook, &block),
9
+ file_system.workbook
10
+ )
8
11
  end
9
12
 
10
- def initialize(file_system, shared_strings, &block)
13
+ def initialize(file_system, workbook, &block)
11
14
  @file_system = file_system
12
- @shared_strings = shared_strings
15
+ @workbook = workbook
13
16
  @block = block
14
17
  @index = -1
18
+ @workbook_pr = false
15
19
  end
16
20
 
17
21
  def start_element(name)
18
- @current_sheet = CurrentSheet.new(@index += 1) if name == :sheet
22
+ case name
23
+ when :sheet
24
+ @current_sheet = CurrentSheet.new(@index += 1)
25
+ when :workbookPr
26
+ @workbook_pr = true
27
+ end
19
28
  end
20
29
 
21
30
  def end_element(name)
22
- if name == :sheet
23
- @block.call Sheet.new(@current_sheet.name, @current_sheet.index, @file_system, @shared_strings)
31
+ case name
32
+ when :sheet
33
+ @block.call Sheet.new(
34
+ @current_sheet.name,
35
+ @current_sheet.index,
36
+ @file_system,
37
+ @workbook
38
+ )
24
39
  @current_sheet = nil
40
+ when :workbookPr
41
+ @workbook_pr = false
25
42
  end
26
43
  end
27
44
 
28
45
  def attr(name, value)
29
- @current_sheet.name = value if @current_sheet && name == :name
46
+ if @current_sheet
47
+ if name == :name
48
+ @current_sheet.name = value
49
+ end
50
+ elsif @workbook_pr
51
+ if name == :date1904 && value =~ /true|1/i
52
+ @workbook.date1904 = true
53
+ end
54
+ end
30
55
  end
31
56
 
32
57
  end
@@ -1,3 +1,3 @@
1
1
  module Saxlsx
2
- VERSION = '0.3.0'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -1,5 +1,9 @@
1
1
  module Saxlsx
2
2
  class Workbook
3
+ DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
4
+ DATE_SYSTEM_1904 = DateTime.new(1904, 1, 1)
5
+
6
+ attr_accessor :date1904
3
7
 
4
8
  def self.open(filename)
5
9
  begin
@@ -35,6 +39,10 @@ module Saxlsx
35
39
  @number_formats ||= StyleCollection.new(@file_system).to_a
36
40
  end
37
41
 
42
+ def base_date
43
+ @base_date ||= date1904 ? DATE_SYSTEM_1904 : DATE_SYSTEM_1900
44
+ end
45
+
38
46
  def to_csv(path)
39
47
  sheets.each { |s| s.to_csv path }
40
48
  end
Binary file
@@ -90,4 +90,18 @@ describe Sheet do
90
90
  end
91
91
  end
92
92
 
93
+ context 'with 1904 date system' do
94
+ let(:filename) { "#{File.dirname(__FILE__)}/data/Spec1904.xlsx" }
95
+
96
+ it 'should use 1904 date system when converting dates' do
97
+ Workbook.open filename do |w|
98
+ w.sheets[0].tap do |s|
99
+ s.rows[0].should eq [
100
+ DateTime.new(1970, 1, 1, 1, 0, 0),
101
+ DateTime.new(1970, 1, 1)
102
+ ]
103
+ end
104
+ end
105
+ end
106
+ end
93
107
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxlsx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edgars Beigarts
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-25 00:00:00.000000000 Z
11
+ date: 2015-03-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -126,6 +126,7 @@ files:
126
126
  - saxlsx.gemspec
127
127
  - spec/column_name_generator_spec.rb
128
128
  - spec/data/Spec.xlsx
129
+ - spec/data/Spec1904.xlsx
129
130
  - spec/sheet_spec.rb
130
131
  - spec/spec_helper.rb
131
132
  - spec/workbook_spec.rb
@@ -156,6 +157,7 @@ summary: Fast xlsx reader on top of Ox SAX parser
156
157
  test_files:
157
158
  - spec/column_name_generator_spec.rb
158
159
  - spec/data/Spec.xlsx
160
+ - spec/data/Spec1904.xlsx
159
161
  - spec/sheet_spec.rb
160
162
  - spec/spec_helper.rb
161
163
  - spec/workbook_spec.rb