saxlsx 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f952d45e55462cfa02bcbf4c2dd5aeb4617f6971
4
- data.tar.gz: 824471f88aa1b5892f35471688cb1ba0c5d6554e
3
+ metadata.gz: 5b0cbd91196c90e22a5dc4ede2bf372792cc5e3f
4
+ data.tar.gz: 96bec50a0662ab5741c54e329c80c073c9c202e2
5
5
  SHA512:
6
- metadata.gz: eeb2a2adc96d6b43235ef07df32615459e23b9dab884eafaa4df9e816fda25e605788ceb9591abce7e5c27de62de7c13b224330d8dd68f3b6020d30b6b6ac1d1
7
- data.tar.gz: 827d651635d5f47eb613559730bfe1f747c4912621142f832437e4eb3c71892448e68427918ad5012fddcda0bb82340ff09ab55c2268ceef8286e02ea3e71f14
6
+ metadata.gz: e55094b6131ae28e50afb8bed91d91f15b200596f94cb6215f9bea6c5e9303d158f281ec6495d8cbb481528f3893ea614748d3fd3c9130b06555df6d98930e10
7
+ data.tar.gz: fbd741be1e3b91784bc11502351a4195d31d0459de3765bd7de6acc64cebf4b5580a9eb0fc19bab0ad844f27ab316c84085ff9b4155bf76d52e5cef2f2d05738
@@ -26,8 +26,13 @@ module Saxlsx
26
26
  @zip.read('xl/sharedStrings.xml')
27
27
  end
28
28
 
29
- def sheets
30
- @zip.glob('xl/worksheets/sheet*.xml').sort.map{ |f| @zip.read(f).match(/<sheetData>.*<\/sheetData>/).to_s }
29
+ def styles
30
+ @zip.read('xl/styles.xml')
31
+ end
32
+
33
+ def sheet(i)
34
+ f = @zip.glob('xl/worksheets/sheet*.xml').sort[i]
35
+ @zip.read(f)
31
36
  end
32
37
 
33
38
  end
@@ -7,14 +7,21 @@ module Saxlsx
7
7
  @index = index
8
8
  @file_system = file_system
9
9
  @shared_strings = shared_strings
10
+ @sheet = file_system.sheet(index)
10
11
  end
11
12
 
12
13
  def each(&block)
13
- RowsCollectionParser.parse @index, @file_system, @shared_strings, &block
14
+ RowsCollectionParser.parse @index, @sheet, @shared_strings, &block
14
15
  end
15
16
 
17
+ def count
18
+ @count ||= @sheet.match(/<dimension ref="[^:]+:[A-Z]*(\d+)"/)[1].to_i
19
+ end
20
+
21
+ alias :size :count
22
+
16
23
  def [](value)
17
24
  to_a[value]
18
25
  end
19
26
  end
20
- end
27
+ end
@@ -1,12 +1,49 @@
1
1
  module Saxlsx
2
2
  class RowsCollectionParser < Ox::Sax
3
+ NUM_FORMATS = {
4
+ 0 => :string, # General
5
+ 1 => :fixnum, # 0
6
+ 2 => :float, # 0.00
7
+ 3 => :fixnum, # #,##0
8
+ 4 => :float, # #,##0.00
9
+ 5 => :unsupported, # $#,##0_);($#,##0)
10
+ 6 => :unsupported, # $#,##0_);[Red]($#,##0)
11
+ 7 => :unsupported, # $#,##0.00_);($#,##0.00)
12
+ 8 => :unsupported, # $#,##0.00_);[Red]($#,##0.00)
13
+ 9 => :percentage, # 0%
14
+ 10 => :percentage, # 0.00%
15
+ 11 => :bignum, # 0.00E+00
16
+ 12 => :unsupported, # # ?/?
17
+ 13 => :unsupported, # # ??/??
18
+ 14 => :date, # mm-dd-yy
19
+ 15 => :date, # d-mmm-yy
20
+ 16 => :date, # d-mmm
21
+ 17 => :date, # mmm-yy
22
+ 18 => :time, # h:mm AM/PM
23
+ 19 => :time, # h:mm:ss AM/PM
24
+ 20 => :time, # h:mm
25
+ 21 => :time, # h:mm:ss
26
+ 22 => :date_time, # m/d/yy h:mm
27
+ 37 => :unsupported, # #,##0 ;(#,##0)
28
+ 38 => :unsupported, # #,##0 ;[Red](#,##0)
29
+ 39 => :unsupported, # #,##0.00;(#,##0.00)
30
+ 40 => :unsupported, # #,##0.00;[Red](#,##0.00)
31
+ 45 => :time, # mm:ss
32
+ 46 => :time, # [h]:mm:ss
33
+ 47 => :time, # mmss.0
34
+ 48 => :bignum, # ##0.0E+0
35
+ 49 => :unsupported # @
36
+ }
3
37
 
4
- def self.parse(index, file_system, shared_strings, &block)
5
- SaxParser.parse self.new(shared_strings, &block), file_system.sheets[index]
38
+ DATE_SYSTEM_1900 = DateTime.new(1899, 12, 30)
39
+
40
+ def self.parse(index, data, workbook, &block)
41
+ SaxParser.parse self.new(workbook, &block), data
6
42
  end
7
43
 
8
- def initialize(shared_strings, &block)
9
- @shared_strings = shared_strings
44
+ def initialize(workbook, &block)
45
+ @shared_strings = workbook.shared_strings
46
+ @number_formats = workbook.number_formats
10
47
  @block = block
11
48
  end
12
49
 
@@ -18,7 +55,10 @@ module Saxlsx
18
55
  @next_column = 'A'
19
56
  end
20
57
 
21
- @current_type = nil if name == :c
58
+ if name == :c
59
+ @current_type = nil
60
+ @current_number_format = nil
61
+ end
22
62
  end
23
63
 
24
64
  def end_element(name)
@@ -30,8 +70,14 @@ module Saxlsx
30
70
 
31
71
  def attr(name, value)
32
72
  if @current_element == :c
33
- @current_type = value if name == :t
34
- @current_column = value.gsub(/\d/, '') if name == :r
73
+ case name
74
+ when :t
75
+ @current_type = value
76
+ when :r
77
+ @current_column = value.gsub(/\d/, '')
78
+ when :s
79
+ @current_number_format = detect_format_type(value.to_i)
80
+ end
35
81
  end
36
82
  end
37
83
 
@@ -50,17 +96,58 @@ module Saxlsx
50
96
 
51
97
  def value_of(text)
52
98
  case @current_type
53
- when 's'
54
- @shared_strings[text.to_i]
55
- when 'b'
56
- BooleanParser.parse text
57
- when 'n'
99
+ when 's'
100
+ @shared_strings[text.to_i]
101
+ when 'b'
102
+ BooleanParser.parse text
103
+ else
104
+ case @current_number_format
105
+ when :date
106
+ DATE_SYSTEM_1900 + text.to_i
107
+ when :date_time
108
+ # Round time to seconds
109
+ date = DATE_SYSTEM_1900 + (text.to_f * 86400).round.fdiv(86400)
110
+ DateTime.new(date.year, date.month, date.day, date.hour, date.minute, date.second)
111
+ when :fixnum
112
+ text.to_i
113
+ when :float
58
114
  text.to_f
115
+ when :bignum
116
+ BigDecimal.new(text)
117
+ when :percentage
118
+ text.to_f / 100
59
119
  else
60
- CGI.unescapeHTML(text)
120
+ if @current_type == 'n'
121
+ text.to_f
122
+ elsif text =~ /\A-?\d+(\.\d+(?:e[+-]\d+)?)?\Z/i # Auto convert numbers
123
+ $1 ? text.to_f : text.to_i
124
+ else
125
+ CGI.unescapeHTML(text)
126
+ end
127
+ end
61
128
  end
129
+ end
62
130
 
131
+ def detect_format_type(index)
132
+ format = @number_formats[index]
133
+ NUM_FORMATS[format] || detect_custom_format_type(format)
63
134
  end
64
135
 
136
+ # This is the least deterministic part of reading xlsx files. Due to
137
+ # custom styles, you can't know for sure when a date is a date other than
138
+ # looking at its format and gessing. It's not impossible to guess right,
139
+ # though.
140
+ #
141
+ # http://stackoverflow.com/questions/4948998/determining-if-an-xlsx-cell-is-date-formatted-for-excel-2007-spreadsheets
142
+ def detect_custom_format_type(code)
143
+ code = code.gsub(/\[[^\]]+\]/, '') # Strip meta - [...]
144
+ if code =~ /0/
145
+ :float
146
+ elsif code =~ /[ymdhis]/i
147
+ :date_time
148
+ else
149
+ :unsupported
150
+ end
151
+ end
65
152
  end
66
153
  end
@@ -0,0 +1,15 @@
1
+ module Saxlsx
2
+ class StyleCollection
3
+
4
+ include Enumerable
5
+
6
+ def initialize(file_system)
7
+ @file_system = file_system
8
+ end
9
+
10
+ def each(&block)
11
+ StyleCollectionParser.parse @file_system, &block
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,52 @@
1
+ module Saxlsx
2
+ class StyleCollectionParser < Ox::Sax
3
+ def self.parse(file_system, &block)
4
+ SaxParser.parse self.new(&block), file_system.styles
5
+ end
6
+
7
+ def initialize(&block)
8
+ @block = block
9
+ @cell_styles = false
10
+ @custom_num_fmts = {}
11
+ end
12
+
13
+ def start_element(name)
14
+ case name
15
+ when :cellXfs
16
+ @cell_styles = true
17
+ when :xf
18
+ @num_fmt_id = nil
19
+ when :numFmt
20
+ @num_fmt_id = nil
21
+ @num_fmt_code = nil
22
+ end
23
+ end
24
+
25
+ def end_element(name)
26
+ case name
27
+ when :cellXfs
28
+ @cell_styles = false
29
+ when :xf
30
+ if @cell_styles
31
+ custom_num_fmt_code = @custom_num_fmts[@num_fmt_id]
32
+ if custom_num_fmt_code
33
+ @block.call custom_num_fmt_code
34
+ else
35
+ @block.call @num_fmt_id
36
+ end
37
+ end
38
+ when :numFmt
39
+ @custom_num_fmts[@num_fmt_id] = @num_fmt_code
40
+ end
41
+ end
42
+
43
+ def attr(name, value)
44
+ case name
45
+ when :numFmtId
46
+ @num_fmt_id = value.to_i
47
+ when :formatCode
48
+ @num_fmt_code = value
49
+ end
50
+ end
51
+ end
52
+ end
@@ -1,3 +1,3 @@
1
1
  module Saxlsx
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'
3
3
  end
@@ -19,7 +19,7 @@ module Saxlsx
19
19
  end
20
20
 
21
21
  def sheets(name=nil)
22
- @sheets ||= SheetCollection.new(@file_system, shared_strings).to_a
22
+ @sheets ||= SheetCollection.new(@file_system, self).to_a
23
23
  name.nil? ? @sheets : @sheets.detect { |s| s.name == name }
24
24
  end
25
25
 
@@ -31,6 +31,10 @@ module Saxlsx
31
31
  @shared_strings ||= SharedStringCollection.new(@file_system).to_a
32
32
  end
33
33
 
34
+ def number_formats
35
+ @number_formats ||= StyleCollection.new(@file_system).to_a
36
+ end
37
+
34
38
  def to_csv(path)
35
39
  sheets.each { |s| s.to_csv path }
36
40
  end
Binary file
@@ -12,11 +12,11 @@ describe Sheet do
12
12
 
13
13
  it 'Rows count' do
14
14
  Workbook.open filename do |w|
15
- w.sheets[0].should have(7).rows
16
- w.sheets[1].should have(9).rows
17
- w.sheets[2].should have(3).rows
18
- w.sheets[3].should have(2).rows
19
- w.sheets[4].should have(3).rows
15
+ w.sheets[0].rows.count.should eq 7
16
+ w.sheets[1].rows.count.should eq 9
17
+ w.sheets[2].rows.count.should eq 3
18
+ w.sheets[3].rows.count.should eq 2
19
+ w.sheets[4].rows.count.should eq 3
20
20
  end
21
21
  end
22
22
 
@@ -29,8 +29,20 @@ describe Sheet do
29
29
  it 'Rows content' do
30
30
  Workbook.open filename do |w|
31
31
  w.sheets[0].tap do |s|
32
- s.rows[0].should eq ['LevenshteinDistance', 0]
33
- s.rows[1].should eq ['Case sensitive', false]
32
+ s.rows[0].should eq [
33
+ 'LevenshteinDistance',
34
+ 3.14,
35
+ 3,
36
+ DateTime.new(1970, 1, 1, 1, 0, 0),
37
+ DateTime.new(1970, 1, 1),
38
+ BigDecimal.new('3.4028236692093801E+38')
39
+ ]
40
+ s.rows[1].should eq [
41
+ 'Case sensitive',
42
+ false,
43
+ 3.0,
44
+ DateTime.new(1970, 1, 1, 1, 0, 0)
45
+ ]
34
46
  s.rows[2].should eq ['Fields', 'Type', 'URL Mining']
35
47
  s.rows[3].should eq ['autor', 'text', false]
36
48
  s.rows[4].should eq ['texto', 'text', false]
@@ -68,8 +80,8 @@ describe Sheet do
68
80
  w.sheets[0].to_csv tmp_path
69
81
 
70
82
  csv = File.open(csv_file, 'r') { |f| f.readlines }
71
- csv[0].should eq "\"LevenshteinDistance\",\"0.0\"\n"
72
- csv[1].should eq "\"Case sensitive\",\"false\"\n"
83
+ csv[0].should eq %{"LevenshteinDistance","3.14","3","1970-01-01T01:00:00+00:00","1970-01-01T00:00:00+00:00","0.34028236692093801E39"\n}
84
+ csv[1].should eq %{"Case sensitive","false","3.0","1970-01-01T01:00:00+00:00"\n}
73
85
  csv[2].should eq "\"Fields\",\"Type\",\"URL Mining\"\n"
74
86
  csv[3].should eq "\"autor\",\"text\",\"false\"\n"
75
87
  csv[4].should eq "\"texto\",\"text\",\"false\"\n"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: saxlsx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Edgars Beigarts
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-11 00:00:00.000000000 Z
11
+ date: 2015-03-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rubyzip
@@ -119,6 +119,8 @@ files:
119
119
  - lib/saxlsx/sheet.rb
120
120
  - lib/saxlsx/sheet_collection.rb
121
121
  - lib/saxlsx/sheet_collection_parser.rb
122
+ - lib/saxlsx/style_collection.rb
123
+ - lib/saxlsx/style_collection_parser.rb
122
124
  - lib/saxlsx/version.rb
123
125
  - lib/saxlsx/workbook.rb
124
126
  - saxlsx.gemspec