saxlsx 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5b0cbd91196c90e22a5dc4ede2bf372792cc5e3f
4
- data.tar.gz: 96bec50a0662ab5741c54e329c80c073c9c202e2
3
+ metadata.gz: 48288886c8097438a2f3527b42c31f0db2a63c34
4
+ data.tar.gz: 3386ac7f891cc701d09697e5620d4d5aafee216b
5
5
  SHA512:
6
- metadata.gz: e55094b6131ae28e50afb8bed91d91f15b200596f94cb6215f9bea6c5e9303d158f281ec6495d8cbb481528f3893ea614748d3fd3c9130b06555df6d98930e10
7
- data.tar.gz: fbd741be1e3b91784bc11502351a4195d31d0459de3765bd7de6acc64cebf4b5580a9eb0fc19bab0ad844f27ab316c84085ff9b4155bf76d52e5cef2f2d05738
6
+ metadata.gz: e8ca7c015a5b34310b2008f70ea9eec36abdcae99dd4fd29ba70711de286f8dfb37a1cbf4ff7e295b98bd1a2d2cdb700f9b83f9cd790df16d9818a943792c005
7
+ data.tar.gz: 7dea47237742b543733e9aab3ffdd2c8d11cb9a8a7826e63b244dd20b782dc5661c16bccbef23485cec81fb9855dd1069799d351f67555f35dcb0509a8f30763
@@ -1,5 +1,5 @@
1
1
  rvm:
2
- - 2.0.0
3
- - 2.1.0
4
- - rbx
2
+ - 2.0
3
+ - 2.1
4
+ - 2.2
5
5
  - ruby-head
data/Gemfile CHANGED
@@ -2,3 +2,8 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in saxlsx.gemspec
4
4
  gemspec
5
+
6
+ gem 'axlsx', '~> 2.1.0.pre'
7
+ gem 'rubyXL'
8
+ gem 'simple_xlsx_reader'
9
+ gem 'creek'
data/README.md CHANGED
@@ -2,7 +2,12 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/mak-it/saxlsx.png?branch=master)](https://travis-ci.org/mak-it/saxlsx)
4
4
 
5
- Fast XLSX reader on top of Ox SAX parser.
5
+ **Fast** and memory efficient XLSX reader on top of Ox SAX parser.
6
+
7
+ It reads row by row and doesn't store the whole sheet in memory, so this
8
+ approach is more suitable when parsing big files. This also means that functions
9
+ and references will not work, as this style of parsing doesn't know
10
+ anything about other rows.
6
11
 
7
12
  ## Installation
8
13
 
@@ -36,6 +41,19 @@ Saxlsx::Workbook.open filename do |w|
36
41
  end
37
42
  ```
38
43
 
44
+ ## How fast is it?
45
+
46
+ ```bash
47
+ $ rake bench
48
+ ```
49
+
50
+ ```
51
+ creek 2.610000 0.060000 2.670000 ( 2.704594)
52
+ rubyXL 3.830000 0.130000 3.960000 ( 3.985651)
53
+ saxlsx 0.750000 0.010000 0.760000 ( 0.785445)
54
+ simple_xlsx_reader 1.870000 0.040000 1.910000 ( 1.940999)
55
+ ```
56
+
39
57
  ## Contributing
40
58
 
41
59
  1. Fork it
data/Rakefile CHANGED
@@ -4,3 +4,71 @@ require "rspec/core/rake_task"
4
4
  Bundler::GemHelper.install_tasks
5
5
  RSpec::Core::RakeTask.new(:spec)
6
6
  task :default => :spec
7
+
8
+
9
+ task :bench do
10
+ require 'benchmark'
11
+ require 'axlsx'
12
+ require 'saxlsx'
13
+ require 'rubyXL'
14
+ require 'simple_xlsx_reader'
15
+ require 'creek'
16
+
17
+ path = "tmp/bench.xlsx"
18
+ unless File.exists?(path)
19
+ puts "* Generating #{path}"
20
+ FileUtils.mkdir_p File.dirname(path)
21
+ Axlsx::Package.new do |p|
22
+ money_style = p.workbook.styles.add_style(
23
+ num_fmt: 5, format_code: "€0.000"
24
+ )
25
+ p.workbook.add_worksheet(:name => "Pie Chart") do |sheet|
26
+ 10000.times do
27
+ sheet.add_row(
28
+ [Date.today, Time.now, 1000, 3.14, "Long" * 100],
29
+ types: [:date, :time, :integer, :float, :string],
30
+ style: [nil, nil, nil, money_style, nil]
31
+ )
32
+ end
33
+ end
34
+ p.use_shared_strings = true
35
+ p.serialize(path)
36
+ end
37
+ end
38
+
39
+ Benchmark.benchmark('', 20) do |x|
40
+ x.report "creek" do
41
+ w = Creek::Book.new path
42
+ w.sheets.each do |s|
43
+ s.rows.each do |r|
44
+ r.values.inspect
45
+ end
46
+ end
47
+ end
48
+ x.report "rubyXL" do
49
+ w = RubyXL::Parser.parse path
50
+ w.worksheets.each do |s|
51
+ s.each do |r|
52
+ r.cells.map(&:value).inspect
53
+ end
54
+ end
55
+ end
56
+ x.report "saxlsx" do
57
+ Saxlsx::Workbook.open path do |w|
58
+ w.sheets.each do |s|
59
+ s.rows.each do |r|
60
+ r.to_a.inspect
61
+ end
62
+ end
63
+ end
64
+ end
65
+ x.report "simple_xlsx_reader" do
66
+ w = SimpleXlsxReader.open path
67
+ w.sheets.each do |s|
68
+ s.rows.each do |r|
69
+ r.to_a.inspect
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -19,11 +19,12 @@ module Saxlsx
19
19
  end
20
20
 
21
21
  def workbook
22
- @zip.read('xl/workbook.xml').match(/<sheets>.*<\/sheets>/).to_s
22
+ @zip.read('xl/workbook.xml')
23
23
  end
24
24
 
25
25
  def shared_strings
26
- @zip.read('xl/sharedStrings.xml')
26
+ file = @zip.glob('xl/shared[Ss]trings.xml').first
27
+ @zip.read(file) if file
27
28
  end
28
29
 
29
30
  def styles
@@ -3,15 +3,15 @@ module Saxlsx
3
3
 
4
4
  include Enumerable
5
5
 
6
- def initialize(index, file_system, shared_strings)
6
+ def initialize(index, file_system, workbook)
7
7
  @index = index
8
8
  @file_system = file_system
9
- @shared_strings = shared_strings
9
+ @workbook = workbook
10
10
  @sheet = file_system.sheet(index)
11
11
  end
12
12
 
13
13
  def each(&block)
14
- RowsCollectionParser.parse @index, @sheet, @shared_strings, &block
14
+ RowsCollectionParser.parse @index, @sheet, @workbook, &block
15
15
  end
16
16
 
17
17
  def count
@@ -35,13 +35,12 @@ module Saxlsx
35
35
  49 => :unsupported # @
36
36
  }
37
37
 
38
- DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
39
-
40
38
  def self.parse(index, data, workbook, &block)
41
39
  SaxParser.parse self.new(workbook, &block), data
42
40
  end
43
41
 
44
42
  def initialize(workbook, &block)
43
+ @base_date = workbook.base_date
45
44
  @shared_strings = workbook.shared_strings
46
45
  @number_formats = workbook.number_formats
47
46
  @block = block
@@ -97,16 +96,16 @@ module Saxlsx
97
96
  def value_of(text)
98
97
  case @current_type
99
98
  when 's'
100
- @shared_strings[text.to_i]
99
+ @shared_strings[text.to_i] || text
101
100
  when 'b'
102
101
  BooleanParser.parse text
103
102
  else
104
103
  case @current_number_format
105
104
  when :date
106
- DATE_SYSTEM_1900 + text.to_i
105
+ @base_date + text.to_i
107
106
  when :date_time
108
107
  # Round time to seconds
109
- date = DATE_SYSTEM_1900 + (text.to_f * 86400).round.fdiv(86400)
108
+ date = @base_date + (text.to_f * 86400).round.fdiv(86400)
110
109
  DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
111
110
  when :fixnum
112
111
  text.to_i
@@ -2,7 +2,12 @@ module Saxlsx
2
2
  class SharedStringCollectionParser < Ox::Sax
3
3
 
4
4
  def self.parse(file_system, &block)
5
- SaxParser.parse self.new(&block), file_system.shared_strings
5
+ shared_strings = file_system.shared_strings
6
+ if shared_strings
7
+ SaxParser.parse self.new(&block), shared_strings
8
+ else
9
+ []
10
+ end
6
11
  end
7
12
 
8
13
  def initialize(&block)
@@ -3,15 +3,15 @@ module Saxlsx
3
3
 
4
4
  attr_reader :name
5
5
 
6
- def initialize(name, index, file_system, shared_strings)
6
+ def initialize(name, index, file_system, workbook)
7
7
  @name = name
8
8
  @index = index
9
9
  @file_system = file_system
10
- @shared_strings = shared_strings
10
+ @workbook = workbook
11
11
  end
12
12
 
13
13
  def rows
14
- @rows ||= RowsCollection.new(@index, @file_system, @shared_strings)
14
+ @rows ||= RowsCollection.new(@index, @file_system, @workbook)
15
15
  end
16
16
 
17
17
  def to_csv(path)
@@ -3,13 +3,13 @@ module Saxlsx
3
3
 
4
4
  include Enumerable
5
5
 
6
- def initialize(file_system, shared_strings)
6
+ def initialize(file_system, workbook)
7
7
  @file_system = file_system
8
- @shared_strings = shared_strings
8
+ @workbook = workbook
9
9
  end
10
10
 
11
11
  def each(&block)
12
- SheetCollectionParser.parse @file_system, @shared_strings, &block
12
+ SheetCollectionParser.parse @file_system, @workbook, &block
13
13
  end
14
14
 
15
15
  end
@@ -3,30 +3,55 @@ module Saxlsx
3
3
 
4
4
  CurrentSheet = Struct.new :index, :name
5
5
 
6
- def self.parse(file_system, shared_strings, &block)
7
- SaxParser.parse self.new(file_system, shared_strings, &block), file_system.workbook
6
+ def self.parse(file_system, workbook, &block)
7
+ SaxParser.parse(
8
+ self.new(file_system, workbook, &block),
9
+ file_system.workbook
10
+ )
8
11
  end
9
12
 
10
- def initialize(file_system, shared_strings, &block)
13
+ def initialize(file_system, workbook, &block)
11
14
  @file_system = file_system
12
- @shared_strings = shared_strings
15
+ @workbook = workbook
13
16
  @block = block
14
17
  @index = -1
18
+ @workbook_pr = false
15
19
  end
16
20
 
17
21
  def start_element(name)
18
- @current_sheet = CurrentSheet.new(@index += 1) if name == :sheet
22
+ case name
23
+ when :sheet
24
+ @current_sheet = CurrentSheet.new(@index += 1)
25
+ when :workbookPr
26
+ @workbook_pr = true
27
+ end
19
28
  end
20
29
 
21
30
  def end_element(name)
22
- if name == :sheet
23
- @block.call Sheet.new(@current_sheet.name, @current_sheet.index, @file_system, @shared_strings)
31
+ case name
32
+ when :sheet
33
+ @block.call Sheet.new(
34
+ @current_sheet.name,
35
+ @current_sheet.index,
36
+ @file_system,
37
+ @workbook
38
+ )
24
39
  @current_sheet = nil
40
+ when :workbookPr
41
+ @workbook_pr = false
25
42
  end
26
43
  end
27
44
 
28
45
  def attr(name, value)
29
- @current_sheet.name = value if @current_sheet && name == :name
46
+ if @current_sheet
47
+ if name == :name
48
+ @current_sheet.name = value
49
+ end
50
+ elsif @workbook_pr
51
+ if name == :date1904 && value =~ /true|1/i
52
+ @workbook.date1904 = true
53
+ end
54
+ end
30
55
  end
31
56
 
32
57
  end
@@ -1,3 +1,3 @@
1
1
  module Saxlsx
2
- VERSION = '0.3.0'
2
+ VERSION = '1.0.0'
3
3
  end
@@ -1,5 +1,9 @@
1
1
  module Saxlsx
2
2
  class Workbook
3
+ DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
4
+ DATE_SYSTEM_1904 = DateTime.new(1904, 1, 1)
5
+
6
+ attr_accessor :date1904
3
7
 
4
8
  def self.open(filename)
5
9
  begin
@@ -35,6 +39,10 @@ module Saxlsx
35
39
  @number_formats ||= StyleCollection.new(@file_system).to_a
36
40
  end
37
41
 
42
+ def base_date
43
+ @base_date ||= date1904 ? DATE_SYSTEM_1904 : DATE_SYSTEM_1900
44
+ end
45
+
38
46
  def to_csv(path)
39
47
  sheets.each { |s| s.to_csv path }
40
48
  end
Binary file
@@ -90,4 +90,18 @@ describe Sheet do
90
90
  end
91
91
  end
92
92
 
93
+ context 'with 1904 date system' do
94
+ let(:filename) { "#{File.dirname(__FILE__)}/data/Spec1904.xlsx" }
95
+
96
+ it 'should use 1904 date system when converting dates' do
97
+ Workbook.open filename do |w|
98
+ w.sheets[0].tap do |s|
99
+ s.rows[0].should eq [
100
+ DateTime.new(1970, 1, 1, 1, 0, 0),
101
+ DateTime.new(1970, 1, 1)
102
+ ]
103
+ end
104
+ end
105
+ end
106
+ end
93
107
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxlsx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edgars Beigarts
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-25 00:00:00.000000000 Z
11
+ date: 2015-03-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -126,6 +126,7 @@ files:
126
126
  - saxlsx.gemspec
127
127
  - spec/column_name_generator_spec.rb
128
128
  - spec/data/Spec.xlsx
129
+ - spec/data/Spec1904.xlsx
129
130
  - spec/sheet_spec.rb
130
131
  - spec/spec_helper.rb
131
132
  - spec/workbook_spec.rb
@@ -156,6 +157,7 @@ summary: Fast xlsx reader on top of Ox SAX parser
156
157
  test_files:
157
158
  - spec/column_name_generator_spec.rb
158
159
  - spec/data/Spec.xlsx
160
+ - spec/data/Spec1904.xlsx
159
161
  - spec/sheet_spec.rb
160
162
  - spec/spec_helper.rb
161
163
  - spec/workbook_spec.rb