roo 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,77 @@
1
+ require 'date'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Cell
6
+ attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
7
+ attr_writer :value
8
+
9
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
10
+ @type = type
11
+ @formula = formula
12
+ @base_date = base_date if [:date, :datetime].include?(@type)
13
+ @excelx_type = excelx_type
14
+ @excelx_value = excelx_value
15
+ @style = style
16
+ @value = type_cast_value(value)
17
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
18
+ @coordinate = coordinate
19
+ end
20
+
21
+ def type
22
+ case
23
+ when @formula
24
+ :formula
25
+ when @value.is_a?(Roo::Link)
26
+ :link
27
+ else
28
+ @type
29
+ end
30
+ end
31
+
32
+ class Coordinate
33
+ attr_accessor :row, :column
34
+
35
+ def initialize(row, column)
36
+ @row, @column = row, column
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def type_cast_value(value)
43
+ case @type
44
+ when :float, :percentage
45
+ value.to_f
46
+ when :date
47
+ create_date(@base_date + value.to_i)
48
+ when :datetime
49
+ create_datetime(@base_date + value.to_f.round(6))
50
+ when :time
51
+ value.to_f * 86_400
52
+ else
53
+ value
54
+ end
55
+ end
56
+
57
+ def create_date(date)
58
+ yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
59
+
60
+ Date.new(yyyy.to_i, mm.to_i, dd.to_i)
61
+ end
62
+
63
+ def create_datetime(date)
64
+ datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
65
+ t = round_datetime(datetime_string)
66
+
67
+ DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
68
+ end
69
+
70
+ def round_datetime(datetime_string)
71
+ /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
72
+
73
+ Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
74
+ end
75
+ end
76
+ end
77
+ end
@@ -1,24 +1,22 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::Comments < Excelx::Extractor
4
+ class Excelx
5
+ class Comments < Excelx::Extractor
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
5
9
 
6
- def comments
7
- @comments ||= extract_comments
8
- end
10
+ private
9
11
 
10
- private
12
+ def extract_comments
13
+ return {} unless doc_exists?
11
14
 
12
- def extract_comments
13
- if doc_exists?
14
- Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
+ Hash[doc.xpath('//comments/commentList/comment').map do |comment|
15
16
  value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
16
17
  [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
17
18
  end]
18
- else
19
- {}
20
19
  end
21
20
  end
22
-
23
21
  end
24
22
  end
@@ -1,20 +1,22 @@
1
1
  module Roo
2
- class Excelx::Extractor
3
- def initialize(path)
4
- @path = path
5
- end
2
+ class Excelx
3
+ class Extractor
4
+ def initialize(path)
5
+ @path = path
6
+ end
6
7
 
7
- private
8
+ private
8
9
 
9
- def doc
10
- @doc ||=
10
+ def doc
11
+ @doc ||=
11
12
  if doc_exists?
12
13
  ::Roo::Utils.load_xml(@path).remove_namespaces!
13
14
  end
14
- end
15
+ end
15
16
 
16
- def doc_exists?
17
- @path && File.exist?(@path)
17
+ def doc_exists?
18
+ @path && File.exist?(@path)
19
+ end
18
20
  end
19
21
  end
20
22
  end
@@ -1,26 +1,25 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::Relationships < Excelx::Extractor
5
- def [](index)
6
- to_a[index]
7
- end
4
+ class Excelx
5
+ class Relationships < Excelx::Extractor
6
+ def [](index)
7
+ to_a[index]
8
+ end
8
9
 
9
- def to_a
10
- @relationships ||= extract_relationships
11
- end
10
+ def to_a
11
+ @relationships ||= extract_relationships
12
+ end
13
+
14
+ private
12
15
 
13
- private
16
+ def extract_relationships
17
+ return [] unless doc_exists?
14
18
 
15
- def extract_relationships
16
- if doc_exists?
17
- Hash[doc.xpath("/Relationships/Relationship").map do |rel|
19
+ Hash[doc.xpath('/Relationships/Relationship').map do |rel|
18
20
  [rel.attribute('Id').text, rel]
19
21
  end]
20
- else
21
- []
22
22
  end
23
23
  end
24
-
25
24
  end
26
25
  end
@@ -1,40 +1,37 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::SharedStrings < Excelx::Extractor
5
- def [](index)
6
- to_a[index]
7
- end
4
+ class Excelx
5
+ class SharedStrings < Excelx::Extractor
6
+ def [](index)
7
+ to_a[index]
8
+ end
8
9
 
9
- def to_a
10
- @array ||= extract_shared_strings
11
- end
10
+ def to_a
11
+ @array ||= extract_shared_strings
12
+ end
13
+
14
+ private
12
15
 
13
- private
16
+ def extract_shared_strings
17
+ return [] unless doc_exists?
14
18
 
15
- def extract_shared_strings
16
- if doc_exists?
17
19
  # read the shared strings xml document
18
- doc.xpath("/sst/si").map do |si|
20
+ doc.xpath('/sst/si').map do |si|
19
21
  shared_string = ''
20
22
  si.children.each do |elem|
21
23
  case elem.name
22
- when 'r'
23
- elem.children.each do |r_elem|
24
- if r_elem.name == 't'
25
- shared_string << r_elem.content
26
- end
27
- end
28
- when 't'
29
- shared_string = elem.content
24
+ when 'r'
25
+ elem.children.each do |r_elem|
26
+ shared_string << r_elem.content if r_elem.name == 't'
27
+ end
28
+ when 't'
29
+ shared_string = elem.content
30
30
  end
31
31
  end
32
32
  shared_string
33
33
  end
34
- else
35
- []
36
34
  end
37
35
  end
38
-
39
36
  end
40
37
  end
@@ -0,0 +1,107 @@
1
+ module Roo
2
+ class Excelx
3
+ class Sheet
4
+ def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
5
+ @name = name
6
+ @rels = Relationships.new(rels_path)
7
+ @comments = Comments.new(comments_path)
8
+ @styles = styles
9
+ @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
10
+ end
11
+
12
+ def cells
13
+ @cells ||= @sheet.cells(@rels)
14
+ end
15
+
16
+ def present_cells
17
+ @present_cells ||= cells.select { |_, cell| cell && cell.value }
18
+ end
19
+
20
+ # Yield each row as array of Excelx::Cell objects
21
+ # accepts options max_rows (int) (offset by 1 for header),
22
+ # pad_cells (boolean) and offset (int)
23
+ def each_row(options = {}, &block)
24
+ row_count = 0
25
+ options[:offset] ||= 0
26
+ @sheet.each_row_streaming do |row|
27
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
28
+ if block_given? && !(options[:offset] && row_count < options[:offset])
29
+ block.call(cells_for_row_element(row, options))
30
+ end
31
+ row_count += 1
32
+ end
33
+ end
34
+
35
+ def row(row_number)
36
+ first_column.upto(last_column).map do |col|
37
+ cells[[row_number, col]]
38
+ end.map { |cell| cell && cell.value }
39
+ end
40
+
41
+ def column(col_number)
42
+ first_row.upto(last_row).map do |row|
43
+ cells[[row, col_number]]
44
+ end.map { |cell| cell && cell.value }
45
+ end
46
+
47
+ # returns the number of the first non-empty row
48
+ def first_row
49
+ @first_row ||= present_cells.keys.map { |row, _| row }.min
50
+ end
51
+
52
+ def last_row
53
+ @last_row ||= present_cells.keys.map { |row, _| row }.max
54
+ end
55
+
56
+ # returns the number of the first non-empty column
57
+ def first_column
58
+ @first_column ||= present_cells.keys.map { |_, col| col }.min
59
+ end
60
+
61
+ # returns the number of the last non-empty column
62
+ def last_column
63
+ @last_column ||= present_cells.keys.map { |_, col| col }.max
64
+ end
65
+
66
+ def excelx_format(key)
67
+ cell = cells[key]
68
+ @styles.style_format(cell.style).to_s if cell
69
+ end
70
+
71
+ def hyperlinks
72
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
73
+ end
74
+
75
+ def comments
76
+ @comments.comments
77
+ end
78
+
79
+ def dimensions
80
+ @sheet.dimensions
81
+ end
82
+
83
+ private
84
+
85
+ # Take an xml row and return an array of Excelx::Cell objects
86
+ # optionally pad array to header width(assumed 1st row).
87
+ # takes option pad_cells (boolean) defaults false
88
+ def cells_for_row_element(row_element, options = {})
89
+ return [] unless row_element
90
+ cell_col = 0
91
+ cells = []
92
+ @sheet.each_cell(row_element) do |cell|
93
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
94
+ cells << cell
95
+ cell_col = cell.coordinate.column
96
+ end
97
+ cells
98
+ end
99
+
100
+ def pad_cells(cell, last_column)
101
+ pad = []
102
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
103
+ pad
104
+ end
105
+ end
106
+ end
107
+ end
@@ -1,96 +1,93 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::SheetDoc < Excelx::Extractor
5
- def initialize(path, relationships, styles, shared_strings, workbook, options = {})
6
- super(path)
7
- @options = options
8
- @relationships = relationships
9
- @styles = styles
10
- @shared_strings = shared_strings
11
- @workbook = workbook
12
- end
4
+ class Excelx
5
+ class SheetDoc < Excelx::Extractor
6
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
7
+ super(path)
8
+ @options = options
9
+ @relationships = relationships
10
+ @styles = styles
11
+ @shared_strings = shared_strings
12
+ @workbook = workbook
13
+ end
13
14
 
14
- def cells(relationships)
15
- @cells ||= extract_cells(relationships)
16
- end
15
+ def cells(relationships)
16
+ @cells ||= extract_cells(relationships)
17
+ end
17
18
 
18
- def hyperlinks(relationships)
19
- @hyperlinks ||= extract_hyperlinks(relationships)
20
- end
19
+ def hyperlinks(relationships)
20
+ @hyperlinks ||= extract_hyperlinks(relationships)
21
+ end
21
22
 
22
- # Get the dimensions for the sheet.
23
- # This is the upper bound of cells that might
24
- # be parsed. (the document may be sparse so cell count is only upper bound)
25
- def dimensions
26
- @dimensions ||= extract_dimensions
27
- end
23
+ # Get the dimensions for the sheet.
24
+ # This is the upper bound of cells that might
25
+ # be parsed. (the document may be sparse so cell count is only upper bound)
26
+ def dimensions
27
+ @dimensions ||= extract_dimensions
28
+ end
28
29
 
29
- # Yield each row xml element to caller
30
- def each_row_streaming(&block)
31
- Roo::Utils.each_element(@path, 'row', &block)
32
- end
30
+ # Yield each row xml element to caller
31
+ def each_row_streaming(&block)
32
+ Roo::Utils.each_element(@path, 'row', &block)
33
+ end
33
34
 
34
- # Yield each cell as Excelx::Cell to caller for given
35
- # row xml
36
- def each_cell(row_xml)
37
- return [] unless row_xml
38
- row_xml.children.each do |cell_element|
39
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
40
- yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
35
+ # Yield each cell as Excelx::Cell to caller for given
36
+ # row xml
37
+ def each_cell(row_xml)
38
+ return [] unless row_xml
39
+ row_xml.children.each do |cell_element|
40
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
41
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
42
+ end
41
43
  end
42
- end
43
44
 
44
- private
45
+ private
45
46
 
46
- def cell_from_xml(cell_xml, hyperlink)
47
- # This is error prone, to_i will silently turn a nil into a 0
48
- # and it works by coincidence that Format[0] is general
49
- style = cell_xml['s'].to_i # should be here
50
- # c: <c r="A5" s="2">
51
- # <v>22606</v>
52
- # </c>, format: , tmp_type: float
53
- value_type =
47
+ def cell_from_xml(cell_xml, hyperlink)
48
+ # This is error prone, to_i will silently turn a nil into a 0
49
+ # and it works by coincidence that Format[0] is general
50
+ style = cell_xml['s'].to_i # should be here
51
+ # c: <c r="A5" s="2">
52
+ # <v>22606</v>
53
+ # </c>, format: , tmp_type: float
54
+ value_type =
54
55
  case cell_xml['t']
55
56
  when 's'
56
57
  :shared
57
58
  when 'b'
58
59
  :boolean
59
- # 2011-02-25 BEGIN
60
60
  when 'str'
61
61
  :string
62
- # 2011-02-25 END
63
- # 2011-09-15 BEGIN
64
62
  when 'inlineStr'
65
63
  :inlinestr
66
- # 2011-09-15 END
67
64
  else
68
65
  format = @styles.style_format(style)
69
66
  Excelx::Format.to_type(format)
70
67
  end
71
- formula = nil
72
- row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
73
- cell_xml.children.each do |cell|
74
- case cell.name
75
- when 'is'
76
- cell.children.each do |inline_str|
77
- if inline_str.name == 't'
78
- return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
68
+ formula = nil
69
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
70
+ cell_xml.children.each do |cell|
71
+ case cell.name
72
+ when 'is'
73
+ cell.children.each do |inline_str|
74
+ if inline_str.name == 't'
75
+ return Excelx::Cell.new(inline_str.content, :string, formula, :string, inline_str.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
76
+ end
79
77
  end
80
- end
81
- when 'f'
82
- formula = cell.content
83
- when 'v'
84
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
85
- value_type =
78
+ when 'f'
79
+ formula = cell.content
80
+ when 'v'
81
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
82
+ value_type =
86
83
  if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
87
84
  :datetime
88
85
  else
89
86
  :date
90
87
  end
91
- end
92
- excelx_type = [:numeric_or_formula,format.to_s]
93
- value =
88
+ end
89
+ excelx_type = [:numeric_or_formula, format.to_s]
90
+ value =
94
91
  case value_type
95
92
  when :shared
96
93
  value_type = :string
@@ -109,54 +106,54 @@ module Roo
109
106
  value_type = :float
110
107
  cell.content
111
108
  end
112
- return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
109
+ return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
110
+ end
113
111
  end
112
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
114
113
  end
115
- Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
116
- end
117
114
 
118
- def extract_hyperlinks(relationships)
119
- Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
120
- if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
121
- [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
122
- end
123
- end.compact]
124
- end
115
+ def extract_hyperlinks(relationships)
116
+ Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
117
+ if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
118
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
119
+ end
120
+ end.compact]
121
+ end
125
122
 
126
- def expand_merged_ranges(cells)
127
- # Extract merged ranges from xml
128
- merges = {}
129
- doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
130
- tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
131
- for row in tl[0]..br[0] do
132
- for col in tl[1]..br[1] do
133
- next if row == tl[0] && col == tl[1]
134
- merges[[row,col]] = tl
123
+ def expand_merged_ranges(cells)
124
+ # Extract merged ranges from xml
125
+ merges = {}
126
+ doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
127
+ tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
128
+ for row in tl[0]..br[0] do
129
+ for col in tl[1]..br[1] do
130
+ next if row == tl[0] && col == tl[1]
131
+ merges[[row, col]] = tl
132
+ end
135
133
  end
136
134
  end
135
+ # Duplicate value into all cells in merged range
136
+ merges.each do |dst, src|
137
+ cells[dst] = cells[src]
138
+ end
137
139
  end
138
- # Duplicate value into all cells in merged range
139
- merges.each do |dst, src|
140
- cells[dst] = cells[src]
141
- end
142
- end
143
140
 
144
- def extract_cells(relationships)
145
- extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
146
- key = ::Roo::Utils.ref_to_key(cell_xml['r'])
147
- [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
148
- end]
149
- if @options[:expand_merged_ranges]
150
- expand_merged_ranges(extracted_cells)
141
+ def extract_cells(relationships)
142
+ extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
143
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
144
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
145
+ end]
146
+
147
+ expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
148
+
149
+ extracted_cells
151
150
  end
152
- extracted_cells
153
- end
154
151
 
155
- def extract_dimensions
156
- Roo::Utils.each_element(@path, 'dimension') do |dimension|
157
- return dimension.attributes["ref"].value
152
+ def extract_dimensions
153
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
154
+ return dimension.attributes['ref'].value
155
+ end
158
156
  end
159
- end
160
157
 
161
158
  =begin
162
159
  Datei xl/comments1.xml
@@ -198,5 +195,6 @@ Datei xl/comments1.xml
198
195
  read_comments(sheet)
199
196
  end
200
197
  =end
198
+ end
201
199
  end
202
200
  end