saxlsx 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f952d45e55462cfa02bcbf4c2dd5aeb4617f6971
4
- data.tar.gz: 824471f88aa1b5892f35471688cb1ba0c5d6554e
3
+ metadata.gz: 5b0cbd91196c90e22a5dc4ede2bf372792cc5e3f
4
+ data.tar.gz: 96bec50a0662ab5741c54e329c80c073c9c202e2
5
5
  SHA512:
6
- metadata.gz: eeb2a2adc96d6b43235ef07df32615459e23b9dab884eafaa4df9e816fda25e605788ceb9591abce7e5c27de62de7c13b224330d8dd68f3b6020d30b6b6ac1d1
7
- data.tar.gz: 827d651635d5f47eb613559730bfe1f747c4912621142f832437e4eb3c71892448e68427918ad5012fddcda0bb82340ff09ab55c2268ceef8286e02ea3e71f14
6
+ metadata.gz: e55094b6131ae28e50afb8bed91d91f15b200596f94cb6215f9bea6c5e9303d158f281ec6495d8cbb481528f3893ea614748d3fd3c9130b06555df6d98930e10
7
+ data.tar.gz: fbd741be1e3b91784bc11502351a4195d31d0459de3765bd7de6acc64cebf4b5580a9eb0fc19bab0ad844f27ab316c84085ff9b4155bf76d52e5cef2f2d05738
@@ -26,8 +26,13 @@ module Saxlsx
26
26
  @zip.read('xl/sharedStrings.xml')
27
27
  end
28
28
 
29
- def sheets
30
- @zip.glob('xl/worksheets/sheet*.xml').sort.map{ |f| @zip.read(f).match(/<sheetData>.*<\/sheetData>/).to_s }
29
+ def styles
30
+ @zip.read('xl/styles.xml')
31
+ end
32
+
33
+ def sheet(i)
34
+ f = @zip.glob('xl/worksheets/sheet*.xml').sort[i]
35
+ @zip.read(f)
31
36
  end
32
37
 
33
38
  end
@@ -7,14 +7,21 @@ module Saxlsx
7
7
  @index = index
8
8
  @file_system = file_system
9
9
  @shared_strings = shared_strings
10
+ @sheet = file_system.sheet(index)
10
11
  end
11
12
 
12
13
  def each(&block)
13
- RowsCollectionParser.parse @index, @file_system, @shared_strings, &block
14
+ RowsCollectionParser.parse @index, @sheet, @shared_strings, &block
14
15
  end
15
16
 
17
+ def count
18
+ @count ||= @sheet.match(/<dimension ref="[^:]+:[A-Z]*(\d+)"/)[1].to_i
19
+ end
20
+
21
+ alias :size :count
22
+
16
23
  def [](value)
17
24
  to_a[value]
18
25
  end
19
26
  end
20
- end
27
+ end
@@ -1,12 +1,49 @@
1
1
  module Saxlsx
2
2
  class RowsCollectionParser < Ox::Sax
3
+ NUM_FORMATS = {
4
+ 0 => :string, # General
5
+ 1 => :fixnum, # 0
6
+ 2 => :float, # 0.00
7
+ 3 => :fixnum, # #,##0
8
+ 4 => :float, # #,##0.00
9
+ 5 => :unsupported, # $#,##0_);($#,##0)
10
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
11
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
12
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
13
+ 9 => :percentage, # 0%
14
+ 10 => :percentage, # 0.00%
15
+ 11 => :bignum, # 0.00E+00
16
+ 12 => :unsupported, # # ?/?
17
+ 13 => :unsupported, # # ??/??
18
+ 14 => :date, # mm-dd-yy
19
+ 15 => :date, # d-mmm-yy
20
+ 16 => :date, # d-mmm
21
+ 17 => :date, # mmm-yy
22
+ 18 => :time, # h:mm AM/PM
23
+ 19 => :time, # h:mm:ss AM/PM
24
+ 20 => :time, # h:mm
25
+ 21 => :time, # h:mm:ss
26
+ 22 => :date_time, # m/d/yy h:mm
27
+ 37 => :unsupported, # #,##0 ;(#,##0)
28
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
29
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
30
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
31
+ 45 => :time, # mm:ss
32
+ 46 => :time, # [h]:mm:ss
33
+ 47 => :time, # mmss.0
34
+ 48 => :bignum, # ##0.0E+0
35
+ 49 => :unsupported # @
36
+ }
3
37
 
4
- def self.parse(index, file_system, shared_strings, &block)
5
- SaxParser.parse self.new(shared_strings, &block), file_system.sheets[index]
38
+ DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
39
+
40
+ def self.parse(index, data, workbook, &block)
41
+ SaxParser.parse self.new(workbook, &block), data
6
42
  end
7
43
 
8
- def initialize(shared_strings, &block)
9
- @shared_strings = shared_strings
44
+ def initialize(workbook, &block)
45
+ @shared_strings = workbook.shared_strings
46
+ @number_formats = workbook.number_formats
10
47
  @block = block
11
48
  end
12
49
 
@@ -18,7 +55,10 @@ module Saxlsx
18
55
  @next_column = 'A'
19
56
  end
20
57
 
21
- @current_type = nil if name == :c
58
+ if name == :c
59
+ @current_type = nil
60
+ @current_number_format = nil
61
+ end
22
62
  end
23
63
 
24
64
  def end_element(name)
@@ -30,8 +70,14 @@ module Saxlsx
30
70
 
31
71
  def attr(name, value)
32
72
  if @current_element == :c
33
- @current_type = value if name == :t
34
- @current_column = value.gsub(/\d/, '') if name == :r
73
+ case name
74
+ when :t
75
+ @current_type = value
76
+ when :r
77
+ @current_column = value.gsub(/\d/, '')
78
+ when :s
79
+ @current_number_format = detect_format_type(value.to_i)
80
+ end
35
81
  end
36
82
  end
37
83
 
@@ -50,17 +96,58 @@ module Saxlsx
50
96
 
51
97
  def value_of(text)
52
98
  case @current_type
53
- when 's'
54
- @shared_strings[text.to_i]
55
- when 'b'
56
- BooleanParser.parse text
57
- when 'n'
99
+ when 's'
100
+ @shared_strings[text.to_i]
101
+ when 'b'
102
+ BooleanParser.parse text
103
+ else
104
+ case @current_number_format
105
+ when :date
106
+ DATE_SYSTEM_1900 + text.to_i
107
+ when :date_time
108
+ # Round time to seconds
109
+ date = DATE_SYSTEM_1900 + (text.to_f * 86400).round.fdiv(86400)
110
+ DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
111
+ when :fixnum
112
+ text.to_i
113
+ when :float
58
114
  text.to_f
115
+ when :bignum
116
+ BigDecimal.new(text)
117
+ when :percentage
118
+ text.to_f / 100
59
119
  else
60
- CGI.unescapeHTML(text)
120
+ if @current_type == 'n'
121
+ text.to_f
122
+ elsif text =~ /\A-?\d+(\.\d+(?:e[+-]\d+)?)?\Z/i # Auto convert numbers
123
+ $1 ? text.to_f : text.to_i
124
+ else
125
+ CGI.unescapeHTML(text)
126
+ end
127
+ end
61
128
  end
129
+ end
62
130
 
131
+ def detect_format_type(index)
132
+ format = @number_formats[index]
133
+ NUM_FORMATS[format] || detect_custom_format_type(format)
63
134
  end
64
135
 
136
+ # This is the least deterministic part of reading xlsx files. Due to
137
+ # custom styles, you can't know for sure when a date is a date other than
138
+ # looking at its format and gessing. It's not impossible to guess right,
139
+ # though.
140
+ #
141
+ # http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
142
+ def detect_custom_format_type(code)
143
+ code = code.gsub(/\[[^\]]+\]/, '') # Strip meta - [...]
144
+ if code =~ /0/
145
+ :float
146
+ elsif code =~ /[ymdhis]/i
147
+ :date_time
148
+ else
149
+ :unsupported
150
+ end
151
+ end
65
152
  end
66
153
  end
@@ -0,0 +1,15 @@
1
+ module Saxlsx
2
+ class StyleCollection
3
+
4
+ include Enumerable
5
+
6
+ def initialize(file_system)
7
+ @file_system = file_system
8
+ end
9
+
10
+ def each(&block)
11
+ StyleCollectionParser.parse @file_system, &block
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,52 @@
1
+ module Saxlsx
2
+ class StyleCollectionParser < Ox::Sax
3
+ def self.parse(file_system, &block)
4
+ SaxParser.parse self.new(&block), file_system.styles
5
+ end
6
+
7
+ def initialize(&block)
8
+ @block = block
9
+ @cell_styles = false
10
+ @custom_num_fmts = {}
11
+ end
12
+
13
+ def start_element(name)
14
+ case name
15
+ when :cellXfs
16
+ @cell_styles = true
17
+ when :xf
18
+ @num_fmt_id = nil
19
+ when :numFmt
20
+ @num_fmt_id = nil
21
+ @num_fmt_code = nil
22
+ end
23
+ end
24
+
25
+ def end_element(name)
26
+ case name
27
+ when :cellXfs
28
+ @cell_styles = false
29
+ when :xf
30
+ if @cell_styles
31
+ custom_num_fmt_code = @custom_num_fmts[@num_fmt_id]
32
+ if custom_num_fmt_code
33
+ @block.call custom_num_fmt_code
34
+ else
35
+ @block.call @num_fmt_id
36
+ end
37
+ end
38
+ when :numFmt
39
+ @custom_num_fmts[@num_fmt_id] = @num_fmt_code
40
+ end
41
+ end
42
+
43
+ def attr(name, value)
44
+ case name
45
+ when :numFmtId
46
+ @num_fmt_id = value.to_i
47
+ when :formatCode
48
+ @num_fmt_code = value
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,3 +1,3 @@
1
1
  module Saxlsx
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'
3
3
  end
@@ -19,7 +19,7 @@ module Saxlsx
19
19
  end
20
20
 
21
21
  def sheets(name=nil)
22
- @sheets ||= SheetCollection.new(@file_system, shared_strings).to_a
22
+ @sheets ||= SheetCollection.new(@file_system, self).to_a
23
23
  name.nil? ? @sheets : @sheets.detect { |s| s.name == name }
24
24
  end
25
25
 
@@ -31,6 +31,10 @@ module Saxlsx
31
31
  @shared_strings ||= SharedStringCollection.new(@file_system).to_a
32
32
  end
33
33
 
34
+ def number_formats
35
+ @number_formats ||= StyleCollection.new(@file_system).to_a
36
+ end
37
+
34
38
  def to_csv(path)
35
39
  sheets.each { |s| s.to_csv path }
36
40
  end
Binary file
@@ -12,11 +12,11 @@ describe Sheet do
12
12
 
13
13
  it 'Rows count' do
14
14
  Workbook.open filename do |w|
15
- w.sheets[0].should have(7).rows
16
- w.sheets[1].should have(9).rows
17
- w.sheets[2].should have(3).rows
18
- w.sheets[3].should have(2).rows
19
- w.sheets[4].should have(3).rows
15
+ w.sheets[0].rows.count.should eq 7
16
+ w.sheets[1].rows.count.should eq 9
17
+ w.sheets[2].rows.count.should eq 3
18
+ w.sheets[3].rows.count.should eq 2
19
+ w.sheets[4].rows.count.should eq 3
20
20
  end
21
21
  end
22
22
 
@@ -29,8 +29,20 @@ describe Sheet do
29
29
  it 'Rows content' do
30
30
  Workbook.open filename do |w|
31
31
  w.sheets[0].tap do |s|
32
- s.rows[0].should eq ['LevenshteinDistance', 0]
33
- s.rows[1].should eq ['Case sensitive', false]
32
+ s.rows[0].should eq [
33
+ 'LevenshteinDistance',
34
+ 3.14,
35
+ 3,
36
+ DateTime.new(1970, 1, 1, 1, 0, 0),
37
+ DateTime.new(1970, 1, 1),
38
+ BigDecimal.new('3.4028236692093801E+38')
39
+ ]
40
+ s.rows[1].should eq [
41
+ 'Case sensitive',
42
+ false,
43
+ 3.0,
44
+ DateTime.new(1970, 1, 1, 1, 0, 0)
45
+ ]
34
46
  s.rows[2].should eq ['Fields', 'Type', 'URL Mining']
35
47
  s.rows[3].should eq ['autor', 'text', false]
36
48
  s.rows[4].should eq ['texto', 'text', false]
@@ -68,8 +80,8 @@ describe Sheet do
68
80
  w.sheets[0].to_csv tmp_path
69
81
 
70
82
  csv = File.open(csv_file, 'r') { |f| f.readlines }
71
- csv[0].should eq "\"LevenshteinDistance\",\"0.0\"\n"
72
- csv[1].should eq "\"Case sensitive\",\"false\"\n"
83
+ csv[0].should eq %{"LevenshteinDistance","3.14","3","1970-01-01T01:00:00+00:00","1970-01-01T00:00:00+00:00","0.34028236692093801E39"\n}
84
+ csv[1].should eq %{"Case sensitive","false","3.0","1970-01-01T01:00:00+00:00"\n}
73
85
  csv[2].should eq "\"Fields\",\"Type\",\"URL Mining\"\n"
74
86
  csv[3].should eq "\"autor\",\"text\",\"false\"\n"
75
87
  csv[4].should eq "\"texto\",\"text\",\"false\"\n"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxlsx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edgars Beigarts
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-11 00:00:00.000000000 Z
11
+ date: 2015-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -119,6 +119,8 @@ files:
119
119
  - lib/saxlsx/sheet.rb
120
120
  - lib/saxlsx/sheet_collection.rb
121
121
  - lib/saxlsx/sheet_collection_parser.rb
122
+ - lib/saxlsx/style_collection.rb
123
+ - lib/saxlsx/style_collection_parser.rb
122
124
  - lib/saxlsx/version.rb
123
125
  - lib/saxlsx/workbook.rb
124
126
  - saxlsx.gemspec