roo 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ require 'date'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Cell
6
+ attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
7
+ attr_writer :value
8
+
9
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
10
+ @type = type
11
+ @formula = formula
12
+ @base_date = base_date if [:date, :datetime].include?(@type)
13
+ @excelx_type = excelx_type
14
+ @excelx_value = excelx_value
15
+ @style = style
16
+ @value = type_cast_value(value)
17
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
18
+ @coordinate = coordinate
19
+ end
20
+
21
+ def type
22
+ case
23
+ when @formula
24
+ :formula
25
+ when @value.is_a?(Roo::Link)
26
+ :link
27
+ else
28
+ @type
29
+ end
30
+ end
31
+
32
+ class Coordinate
33
+ attr_accessor :row, :column
34
+
35
+ def initialize(row, column)
36
+ @row, @column = row, column
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def type_cast_value(value)
43
+ case @type
44
+ when :float, :percentage
45
+ value.to_f
46
+ when :date
47
+ create_date(@base_date + value.to_i)
48
+ when :datetime
49
+ create_datetime(@base_date + value.to_f.round(6))
50
+ when :time
51
+ value.to_f * 86_400
52
+ else
53
+ value
54
+ end
55
+ end
56
+
57
+ def create_date(date)
58
+ yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
59
+
60
+ Date.new(yyyy.to_i, mm.to_i, dd.to_i)
61
+ end
62
+
63
+ def create_datetime(date)
64
+ datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
65
+ t = round_datetime(datetime_string)
66
+
67
+ DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
68
+ end
69
+
70
+ def round_datetime(datetime_string)
71
+ /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
72
+
73
+ Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
74
+ end
75
+ end
76
+ end
77
+ end
@@ -1,24 +1,22 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::Comments < Excelx::Extractor
4
+ class Excelx
5
+ class Comments < Excelx::Extractor
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
5
9
 
6
- def comments
7
- @comments ||= extract_comments
8
- end
10
+ private
9
11
 
10
- private
12
+ def extract_comments
13
+ return {} unless doc_exists?
11
14
 
12
- def extract_comments
13
- if doc_exists?
14
- Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
+ Hash[doc.xpath('//comments/commentList/comment').map do |comment|
15
16
  value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
16
17
  [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
17
18
  end]
18
- else
19
- {}
20
19
  end
21
20
  end
22
-
23
21
  end
24
22
  end
@@ -1,20 +1,22 @@
1
1
  module Roo
2
- class Excelx::Extractor
3
- def initialize(path)
4
- @path = path
5
- end
2
+ class Excelx
3
+ class Extractor
4
+ def initialize(path)
5
+ @path = path
6
+ end
6
7
 
7
- private
8
+ private
8
9
 
9
- def doc
10
- @doc ||=
10
+ def doc
11
+ @doc ||=
11
12
  if doc_exists?
12
13
  ::Roo::Utils.load_xml(@path).remove_namespaces!
13
14
  end
14
- end
15
+ end
15
16
 
16
- def doc_exists?
17
- @path && File.exist?(@path)
17
+ def doc_exists?
18
+ @path && File.exist?(@path)
19
+ end
18
20
  end
19
21
  end
20
22
  end
@@ -1,26 +1,25 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::Relationships < Excelx::Extractor
5
- def [](index)
6
- to_a[index]
7
- end
4
+ class Excelx
5
+ class Relationships < Excelx::Extractor
6
+ def [](index)
7
+ to_a[index]
8
+ end
8
9
 
9
- def to_a
10
- @relationships ||= extract_relationships
11
- end
10
+ def to_a
11
+ @relationships ||= extract_relationships
12
+ end
13
+
14
+ private
12
15
 
13
- private
16
+ def extract_relationships
17
+ return [] unless doc_exists?
14
18
 
15
- def extract_relationships
16
- if doc_exists?
17
- Hash[doc.xpath("/Relationships/Relationship").map do |rel|
19
+ Hash[doc.xpath('/Relationships/Relationship').map do |rel|
18
20
  [rel.attribute('Id').text, rel]
19
21
  end]
20
- else
21
- []
22
22
  end
23
23
  end
24
-
25
24
  end
26
25
  end
@@ -1,40 +1,37 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::SharedStrings < Excelx::Extractor
5
- def [](index)
6
- to_a[index]
7
- end
4
+ class Excelx
5
+ class SharedStrings < Excelx::Extractor
6
+ def [](index)
7
+ to_a[index]
8
+ end
8
9
 
9
- def to_a
10
- @array ||= extract_shared_strings
11
- end
10
+ def to_a
11
+ @array ||= extract_shared_strings
12
+ end
13
+
14
+ private
12
15
 
13
- private
16
+ def extract_shared_strings
17
+ return [] unless doc_exists?
14
18
 
15
- def extract_shared_strings
16
- if doc_exists?
17
19
  # read the shared strings xml document
18
- doc.xpath("/sst/si").map do |si|
20
+ doc.xpath('/sst/si').map do |si|
19
21
  shared_string = ''
20
22
  si.children.each do |elem|
21
23
  case elem.name
22
- when 'r'
23
- elem.children.each do |r_elem|
24
- if r_elem.name == 't'
25
- shared_string << r_elem.content
26
- end
27
- end
28
- when 't'
29
- shared_string = elem.content
24
+ when 'r'
25
+ elem.children.each do |r_elem|
26
+ shared_string << r_elem.content if r_elem.name == 't'
27
+ end
28
+ when 't'
29
+ shared_string = elem.content
30
30
  end
31
31
  end
32
32
  shared_string
33
33
  end
34
- else
35
- []
36
34
  end
37
35
  end
38
-
39
36
  end
40
37
  end
@@ -0,0 +1,107 @@
1
+ module Roo
2
+ class Excelx
3
+ class Sheet
4
+ def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
5
+ @name = name
6
+ @rels = Relationships.new(rels_path)
7
+ @comments = Comments.new(comments_path)
8
+ @styles = styles
9
+ @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
10
+ end
11
+
12
+ def cells
13
+ @cells ||= @sheet.cells(@rels)
14
+ end
15
+
16
+ def present_cells
17
+ @present_cells ||= cells.select { |_, cell| cell && cell.value }
18
+ end
19
+
20
+ # Yield each row as array of Excelx::Cell objects
21
+ # accepts options max_rows (int) (offset by 1 for header),
22
+ # pad_cells (boolean) and offset (int)
23
+ def each_row(options = {}, &block)
24
+ row_count = 0
25
+ options[:offset] ||= 0
26
+ @sheet.each_row_streaming do |row|
27
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
28
+ if block_given? && !(options[:offset] && row_count < options[:offset])
29
+ block.call(cells_for_row_element(row, options))
30
+ end
31
+ row_count += 1
32
+ end
33
+ end
34
+
35
+ def row(row_number)
36
+ first_column.upto(last_column).map do |col|
37
+ cells[[row_number, col]]
38
+ end.map { |cell| cell && cell.value }
39
+ end
40
+
41
+ def column(col_number)
42
+ first_row.upto(last_row).map do |row|
43
+ cells[[row, col_number]]
44
+ end.map { |cell| cell && cell.value }
45
+ end
46
+
47
+ # returns the number of the first non-empty row
48
+ def first_row
49
+ @first_row ||= present_cells.keys.map { |row, _| row }.min
50
+ end
51
+
52
+ def last_row
53
+ @last_row ||= present_cells.keys.map { |row, _| row }.max
54
+ end
55
+
56
+ # returns the number of the first non-empty column
57
+ def first_column
58
+ @first_column ||= present_cells.keys.map { |_, col| col }.min
59
+ end
60
+
61
+ # returns the number of the last non-empty column
62
+ def last_column
63
+ @last_column ||= present_cells.keys.map { |_, col| col }.max
64
+ end
65
+
66
+ def excelx_format(key)
67
+ cell = cells[key]
68
+ @styles.style_format(cell.style).to_s if cell
69
+ end
70
+
71
+ def hyperlinks
72
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
73
+ end
74
+
75
+ def comments
76
+ @comments.comments
77
+ end
78
+
79
+ def dimensions
80
+ @sheet.dimensions
81
+ end
82
+
83
+ private
84
+
85
+ # Take an xml row and return an array of Excelx::Cell objects
86
+ # optionally pad array to header width(assumed 1st row).
87
+ # takes option pad_cells (boolean) defaults false
88
+ def cells_for_row_element(row_element, options = {})
89
+ return [] unless row_element
90
+ cell_col = 0
91
+ cells = []
92
+ @sheet.each_cell(row_element) do |cell|
93
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
94
+ cells << cell
95
+ cell_col = cell.coordinate.column
96
+ end
97
+ cells
98
+ end
99
+
100
+ def pad_cells(cell, last_column)
101
+ pad = []
102
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
103
+ pad
104
+ end
105
+ end
106
+ end
107
+ end
@@ -1,96 +1,93 @@
1
1
  require 'roo/excelx/extractor'
2
2
 
3
3
  module Roo
4
- class Excelx::SheetDoc < Excelx::Extractor
5
- def initialize(path, relationships, styles, shared_strings, workbook, options = {})
6
- super(path)
7
- @options = options
8
- @relationships = relationships
9
- @styles = styles
10
- @shared_strings = shared_strings
11
- @workbook = workbook
12
- end
4
+ class Excelx
5
+ class SheetDoc < Excelx::Extractor
6
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
7
+ super(path)
8
+ @options = options
9
+ @relationships = relationships
10
+ @styles = styles
11
+ @shared_strings = shared_strings
12
+ @workbook = workbook
13
+ end
13
14
 
14
- def cells(relationships)
15
- @cells ||= extract_cells(relationships)
16
- end
15
+ def cells(relationships)
16
+ @cells ||= extract_cells(relationships)
17
+ end
17
18
 
18
- def hyperlinks(relationships)
19
- @hyperlinks ||= extract_hyperlinks(relationships)
20
- end
19
+ def hyperlinks(relationships)
20
+ @hyperlinks ||= extract_hyperlinks(relationships)
21
+ end
21
22
 
22
- # Get the dimensions for the sheet.
23
- # This is the upper bound of cells that might
24
- # be parsed. (the document may be sparse so cell count is only upper bound)
25
- def dimensions
26
- @dimensions ||= extract_dimensions
27
- end
23
+ # Get the dimensions for the sheet.
24
+ # This is the upper bound of cells that might
25
+ # be parsed. (the document may be sparse so cell count is only upper bound)
26
+ def dimensions
27
+ @dimensions ||= extract_dimensions
28
+ end
28
29
 
29
- # Yield each row xml element to caller
30
- def each_row_streaming(&block)
31
- Roo::Utils.each_element(@path, 'row', &block)
32
- end
30
+ # Yield each row xml element to caller
31
+ def each_row_streaming(&block)
32
+ Roo::Utils.each_element(@path, 'row', &block)
33
+ end
33
34
 
34
- # Yield each cell as Excelx::Cell to caller for given
35
- # row xml
36
- def each_cell(row_xml)
37
- return [] unless row_xml
38
- row_xml.children.each do |cell_element|
39
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
40
- yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
35
+ # Yield each cell as Excelx::Cell to caller for given
36
+ # row xml
37
+ def each_cell(row_xml)
38
+ return [] unless row_xml
39
+ row_xml.children.each do |cell_element|
40
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
41
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
42
+ end
41
43
  end
42
- end
43
44
 
44
- private
45
+ private
45
46
 
46
- def cell_from_xml(cell_xml, hyperlink)
47
- # This is error prone, to_i will silently turn a nil into a 0
48
- # and it works by coincidence that Format[0] is general
49
- style = cell_xml['s'].to_i # should be here
50
- # c: <c r="A5" s="2">
51
- # <v>22606</v>
52
- # </c>, format: , tmp_type: float
53
- value_type =
47
+ def cell_from_xml(cell_xml, hyperlink)
48
+ # This is error prone, to_i will silently turn a nil into a 0
49
+ # and it works by coincidence that Format[0] is general
50
+ style = cell_xml['s'].to_i # should be here
51
+ # c: <c r="A5" s="2">
52
+ # <v>22606</v>
53
+ # </c>, format: , tmp_type: float
54
+ value_type =
54
55
  case cell_xml['t']
55
56
  when 's'
56
57
  :shared
57
58
  when 'b'
58
59
  :boolean
59
- # 2011-02-25 BEGIN
60
60
  when 'str'
61
61
  :string
62
- # 2011-02-25 END
63
- # 2011-09-15 BEGIN
64
62
  when 'inlineStr'
65
63
  :inlinestr
66
- # 2011-09-15 END
67
64
  else
68
65
  format = @styles.style_format(style)
69
66
  Excelx::Format.to_type(format)
70
67
  end
71
- formula = nil
72
- row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
73
- cell_xml.children.each do |cell|
74
- case cell.name
75
- when 'is'
76
- cell.children.each do |inline_str|
77
- if inline_str.name == 't'
78
- return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
68
+ formula = nil
69
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
70
+ cell_xml.children.each do |cell|
71
+ case cell.name
72
+ when 'is'
73
+ cell.children.each do |inline_str|
74
+ if inline_str.name == 't'
75
+ return Excelx::Cell.new(inline_str.content, :string, formula, :string, inline_str.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
76
+ end
79
77
  end
80
- end
81
- when 'f'
82
- formula = cell.content
83
- when 'v'
84
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
85
- value_type =
78
+ when 'f'
79
+ formula = cell.content
80
+ when 'v'
81
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
82
+ value_type =
86
83
  if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
87
84
  :datetime
88
85
  else
89
86
  :date
90
87
  end
91
- end
92
- excelx_type = [:numeric_or_formula,format.to_s]
93
- value =
88
+ end
89
+ excelx_type = [:numeric_or_formula, format.to_s]
90
+ value =
94
91
  case value_type
95
92
  when :shared
96
93
  value_type = :string
@@ -109,54 +106,54 @@ module Roo
109
106
  value_type = :float
110
107
  cell.content
111
108
  end
112
- return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
109
+ return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
110
+ end
113
111
  end
112
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
114
113
  end
115
- Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
116
- end
117
114
 
118
- def extract_hyperlinks(relationships)
119
- Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
120
- if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
121
- [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
122
- end
123
- end.compact]
124
- end
115
+ def extract_hyperlinks(relationships)
116
+ Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
117
+ if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
118
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
119
+ end
120
+ end.compact]
121
+ end
125
122
 
126
- def expand_merged_ranges(cells)
127
- # Extract merged ranges from xml
128
- merges = {}
129
- doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
130
- tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
131
- for row in tl[0]..br[0] do
132
- for col in tl[1]..br[1] do
133
- next if row == tl[0] && col == tl[1]
134
- merges[[row,col]] = tl
123
+ def expand_merged_ranges(cells)
124
+ # Extract merged ranges from xml
125
+ merges = {}
126
+ doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
127
+ tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
128
+ for row in tl[0]..br[0] do
129
+ for col in tl[1]..br[1] do
130
+ next if row == tl[0] && col == tl[1]
131
+ merges[[row, col]] = tl
132
+ end
135
133
  end
136
134
  end
135
+ # Duplicate value into all cells in merged range
136
+ merges.each do |dst, src|
137
+ cells[dst] = cells[src]
138
+ end
137
139
  end
138
- # Duplicate value into all cells in merged range
139
- merges.each do |dst, src|
140
- cells[dst] = cells[src]
141
- end
142
- end
143
140
 
144
- def extract_cells(relationships)
145
- extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
146
- key = ::Roo::Utils.ref_to_key(cell_xml['r'])
147
- [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
148
- end]
149
- if @options[:expand_merged_ranges]
150
- expand_merged_ranges(extracted_cells)
141
+ def extract_cells(relationships)
142
+ extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
143
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
144
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
145
+ end]
146
+
147
+ expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
148
+
149
+ extracted_cells
151
150
  end
152
- extracted_cells
153
- end
154
151
 
155
- def extract_dimensions
156
- Roo::Utils.each_element(@path, 'dimension') do |dimension|
157
- return dimension.attributes["ref"].value
152
+ def extract_dimensions
153
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
154
+ return dimension.attributes['ref'].value
155
+ end
158
156
  end
159
- end
160
157
 
161
158
  =begin
162
159
  Datei xl/comments1.xml
@@ -198,5 +195,6 @@ Datei xl/comments1.xml
198
195
  read_comments(sheet)
199
196
  end
200
197
  =end
198
+ end
201
199
  end
202
200
  end