roo 1.13.2 → 2.0.0beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG +21 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo.rb +18 -24
  15. data/lib/roo/base.rb +303 -388
  16. data/lib/roo/csv.rb +120 -113
  17. data/lib/roo/excelx.rb +452 -484
  18. data/lib/roo/excelx/comments.rb +24 -0
  19. data/lib/roo/excelx/extractor.rb +20 -0
  20. data/lib/roo/excelx/relationships.rb +26 -0
  21. data/lib/roo/excelx/shared_strings.rb +40 -0
  22. data/lib/roo/excelx/sheet_doc.rb +202 -0
  23. data/lib/roo/excelx/styles.rb +62 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/font.rb +17 -0
  26. data/lib/roo/libre_office.rb +5 -0
  27. data/lib/roo/link.rb +15 -0
  28. data/lib/roo/{openoffice.rb → open_office.rb} +678 -496
  29. data/lib/roo/spreadsheet.rb +20 -23
  30. data/lib/roo/utils.rb +78 -0
  31. data/lib/roo/version.rb +3 -0
  32. data/roo.gemspec +20 -204
  33. data/spec/lib/roo/base_spec.rb +1 -4
  34. data/spec/lib/roo/csv_spec.rb +21 -13
  35. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  36. data/spec/lib/roo/excelx_spec.rb +388 -11
  37. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  38. data/spec/lib/roo/openoffice_spec.rb +2 -8
  39. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  40. data/spec/lib/roo/utils_spec.rb +106 -0
  41. data/spec/spec_helper.rb +2 -1
  42. data/test/test_generic_spreadsheet.rb +19 -67
  43. data/test/test_helper.rb +9 -56
  44. data/test/test_roo.rb +252 -477
  45. metadata +63 -302
  46. data/Gemfile.lock +0 -78
  47. data/README.markdown +0 -126
  48. data/VERSION +0 -1
  49. data/lib/roo/excel.rb +0 -355
  50. data/lib/roo/excel2003xml.rb +0 -300
  51. data/lib/roo/google.rb +0 -292
  52. data/lib/roo/roo_rails_helper.rb +0 -83
  53. data/lib/roo/worksheet.rb +0 -18
  54. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  55. data/spec/lib/roo/excel_spec.rb +0 -17
  56. data/spec/lib/roo/google_spec.rb +0 -64
  57. data/test/files/1900_base.xls +0 -0
  58. data/test/files/1900_base.xlsx +0 -0
  59. data/test/files/1904_base.xls +0 -0
  60. data/test/files/1904_base.xlsx +0 -0
  61. data/test/files/Bibelbund.csv +0 -3741
  62. data/test/files/Bibelbund.ods +0 -0
  63. data/test/files/Bibelbund.xls +0 -0
  64. data/test/files/Bibelbund.xlsx +0 -0
  65. data/test/files/Bibelbund.xml +0 -62518
  66. data/test/files/Bibelbund1.ods +0 -0
  67. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  68. data/test/files/bad_excel_date.xls +0 -0
  69. data/test/files/bbu.ods +0 -0
  70. data/test/files/bbu.xls +0 -0
  71. data/test/files/bbu.xlsx +0 -0
  72. data/test/files/bbu.xml +0 -152
  73. data/test/files/bode-v1.ods.zip +0 -0
  74. data/test/files/bode-v1.xls.zip +0 -0
  75. data/test/files/boolean.csv +0 -2
  76. data/test/files/boolean.ods +0 -0
  77. data/test/files/boolean.xls +0 -0
  78. data/test/files/boolean.xlsx +0 -0
  79. data/test/files/boolean.xml +0 -112
  80. data/test/files/borders.ods +0 -0
  81. data/test/files/borders.xls +0 -0
  82. data/test/files/borders.xlsx +0 -0
  83. data/test/files/borders.xml +0 -144
  84. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  85. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  86. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  87. data/test/files/comments.ods +0 -0
  88. data/test/files/comments.xls +0 -0
  89. data/test/files/comments.xlsx +0 -0
  90. data/test/files/csvtypes.csv +0 -1
  91. data/test/files/datetime.ods +0 -0
  92. data/test/files/datetime.xls +0 -0
  93. data/test/files/datetime.xlsx +0 -0
  94. data/test/files/datetime.xml +0 -142
  95. data/test/files/datetime_floatconv.xls +0 -0
  96. data/test/files/datetime_floatconv.xml +0 -148
  97. data/test/files/dreimalvier.ods +0 -0
  98. data/test/files/emptysheets.ods +0 -0
  99. data/test/files/emptysheets.xls +0 -0
  100. data/test/files/emptysheets.xlsx +0 -0
  101. data/test/files/emptysheets.xml +0 -105
  102. data/test/files/excel2003.xml +0 -21140
  103. data/test/files/false_encoding.xls +0 -0
  104. data/test/files/false_encoding.xml +0 -132
  105. data/test/files/file_item_error.xlsx +0 -0
  106. data/test/files/formula.ods +0 -0
  107. data/test/files/formula.xls +0 -0
  108. data/test/files/formula.xlsx +0 -0
  109. data/test/files/formula.xml +0 -134
  110. data/test/files/formula_parse_error.xls +0 -0
  111. data/test/files/formula_parse_error.xml +0 -1833
  112. data/test/files/formula_string_error.xlsx +0 -0
  113. data/test/files/html-escape.ods +0 -0
  114. data/test/files/link.xls +0 -0
  115. data/test/files/link.xlsx +0 -0
  116. data/test/files/matrix.ods +0 -0
  117. data/test/files/matrix.xls +0 -0
  118. data/test/files/named_cells.ods +0 -0
  119. data/test/files/named_cells.xls +0 -0
  120. data/test/files/named_cells.xlsx +0 -0
  121. data/test/files/no_spreadsheet_file.txt +0 -1
  122. data/test/files/numbers1.csv +0 -18
  123. data/test/files/numbers1.ods +0 -0
  124. data/test/files/numbers1.xls +0 -0
  125. data/test/files/numbers1.xlsx +0 -0
  126. data/test/files/numbers1.xml +0 -312
  127. data/test/files/numeric-link.xlsx +0 -0
  128. data/test/files/only_one_sheet.ods +0 -0
  129. data/test/files/only_one_sheet.xls +0 -0
  130. data/test/files/only_one_sheet.xlsx +0 -0
  131. data/test/files/only_one_sheet.xml +0 -67
  132. data/test/files/paragraph.ods +0 -0
  133. data/test/files/paragraph.xls +0 -0
  134. data/test/files/paragraph.xlsx +0 -0
  135. data/test/files/paragraph.xml +0 -127
  136. data/test/files/prova.xls +0 -0
  137. data/test/files/ric.ods +0 -0
  138. data/test/files/simple_spreadsheet.ods +0 -0
  139. data/test/files/simple_spreadsheet.xls +0 -0
  140. data/test/files/simple_spreadsheet.xlsx +0 -0
  141. data/test/files/simple_spreadsheet.xml +0 -225
  142. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  143. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  144. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  145. data/test/files/so_datetime.csv +0 -7
  146. data/test/files/style.ods +0 -0
  147. data/test/files/style.xls +0 -0
  148. data/test/files/style.xlsx +0 -0
  149. data/test/files/style.xml +0 -154
  150. data/test/files/time-test.csv +0 -2
  151. data/test/files/time-test.ods +0 -0
  152. data/test/files/time-test.xls +0 -0
  153. data/test/files/time-test.xlsx +0 -0
  154. data/test/files/time-test.xml +0 -131
  155. data/test/files/type_excel.ods +0 -0
  156. data/test/files/type_excel.xlsx +0 -0
  157. data/test/files/type_excelx.ods +0 -0
  158. data/test/files/type_excelx.xls +0 -0
  159. data/test/files/type_openoffice.xls +0 -0
  160. data/test/files/type_openoffice.xlsx +0 -0
  161. data/test/files/whitespace.ods +0 -0
  162. data/test/files/whitespace.xls +0 -0
  163. data/test/files/whitespace.xlsx +0 -0
  164. data/test/files/whitespace.xml +0 -184
  165. data/test/rm_sub_test.rb +0 -12
  166. data/test/rm_test.rb +0 -7
  167. data/website/index.html +0 -385
  168. data/website/index.txt +0 -423
  169. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  170. data/website/stylesheets/screen.css +0 -130
  171. data/website/template.rhtml +0 -48
@@ -1,113 +1,120 @@
1
- require 'csv'
2
- require 'time'
3
-
4
- # The CSV class can read csv files (must be separated with commas) which then
5
- # can be handled like spreadsheets. This means you can access cells like A5
6
- # within these files.
7
- # The CSV class provides only string objects. If you want conversions to other
8
- # types you have to do it yourself.
9
- #
10
- # You can pass options to the underlying CSV parse operation, via the
11
- # :csv_options option.
12
- #
13
-
14
- class Roo::CSV < Roo::Base
15
- def initialize(filename, options = {})
16
- super
17
- end
18
-
19
- attr_reader :filename
20
-
21
- # Returns an array with the names of the sheets. In CSV class there is only
22
- # one dummy sheet, because a csv file cannot have more than one sheet.
23
- def sheets
24
- ['default']
25
- end
26
-
27
- def cell(row, col, sheet=nil)
28
- sheet ||= @default_sheet
29
- read_cells(sheet)
30
- @cell[normalize(row,col)]
31
- end
32
-
33
- def celltype(row, col, sheet=nil)
34
- sheet ||= @default_sheet
35
- read_cells(sheet)
36
- @cell_type[normalize(row,col)]
37
- end
38
-
39
- def cell_postprocessing(row,col,value)
40
- value
41
- end
42
-
43
- def csv_options
44
- @options[:csv_options] || {}
45
- end
46
-
47
- private
48
-
49
- TYPE_MAP = {
50
- String => :string,
51
- Float => :float,
52
- Date => :date,
53
- DateTime => :datetime,
54
- }
55
-
56
- def celltype_class(value)
57
- TYPE_MAP[value.class]
58
- end
59
-
60
- def each_row(options, &block)
61
- if uri?(filename)
62
- make_tmpdir do |tmpdir|
63
- tmp_filename = download_uri(filename, tmpdir)
64
- CSV.foreach(tmp_filename, options, &block)
65
- end
66
- else
67
- CSV.foreach(filename, options, &block)
68
- end
69
- end
70
-
71
- def read_cells(sheet=nil)
72
- sheet ||= @default_sheet
73
- return if @cells_read[sheet]
74
- @first_row[sheet] = 1
75
- @last_row[sheet] = 0
76
- @first_column[sheet] = 1
77
- @last_column[sheet] = 1
78
- rownum = 1
79
- each_row csv_options do |row|
80
- row.each_with_index do |elem,i|
81
- @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
82
- @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
83
- if i+1 > @last_column[sheet]
84
- @last_column[sheet] += 1
85
- end
86
- end
87
- rownum += 1
88
- @last_row[sheet] += 1
89
- end
90
- @cells_read[sheet] = true
91
- #-- adjust @first_row if neccessary
92
- while !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
93
- @first_row[sheet] += 1
94
- end
95
- #-- adjust @last_row if neccessary
96
- while !row(@last_row[sheet]).any? and @last_row[sheet] and
97
- @last_row[sheet] > @first_row[sheet]
98
- @last_row[sheet] -= 1
99
- end
100
- #-- adjust @first_column if neccessary
101
- while !column(@first_column[sheet]).any? and
102
- @first_column[sheet] and
103
- @first_column[sheet] < @last_column[sheet]
104
- @first_column[sheet] += 1
105
- end
106
- #-- adjust @last_column if neccessary
107
- while !column(@last_column[sheet]).any? and
108
- @last_column[sheet] and
109
- @last_column[sheet] > @first_column[sheet]
110
- @last_column[sheet] -= 1
111
- end
112
- end
113
- end
1
+ require 'csv'
2
+ require 'time'
3
+
4
+ # The CSV class can read csv files (must be separated with commas) which then
5
+ # can be handled like spreadsheets. This means you can access cells like A5
6
+ # within these files.
7
+ # The CSV class provides only string objects. If you want conversions to other
8
+ # types you have to do it yourself.
9
+ #
10
+ # You can pass options to the underlying CSV parse operation, via the
11
+ # :csv_options option.
12
+ #
13
+
14
+ class Roo::CSV < Roo::Base
15
+
16
+ attr_reader :filename
17
+
18
+ # Returns an array with the names of the sheets. In CSV class there is only
19
+ # one dummy sheet, because a csv file cannot have more than one sheet.
20
+ def sheets
21
+ ['default']
22
+ end
23
+
24
+ def cell(row, col, sheet=nil)
25
+ sheet ||= default_sheet
26
+ read_cells(sheet)
27
+ @cell[normalize(row,col)]
28
+ end
29
+
30
+ def celltype(row, col, sheet=nil)
31
+ sheet ||= default_sheet
32
+ read_cells(sheet)
33
+ @cell_type[normalize(row,col)]
34
+ end
35
+
36
+ def cell_postprocessing(row,col,value)
37
+ value
38
+ end
39
+
40
+ def csv_options
41
+ @options[:csv_options] || {}
42
+ end
43
+
44
+ private
45
+
46
+ TYPE_MAP = {
47
+ String => :string,
48
+ Float => :float,
49
+ Date => :date,
50
+ DateTime => :datetime,
51
+ }
52
+
53
+ def celltype_class(value)
54
+ TYPE_MAP[value.class]
55
+ end
56
+
57
+ def each_row(options, &block)
58
+ if uri?(filename)
59
+ make_tmpdir do |tmpdir|
60
+ tmp_filename = download_uri(filename, tmpdir)
61
+ CSV.foreach(tmp_filename, options, &block)
62
+ end
63
+ else
64
+ CSV.foreach(filename, options, &block)
65
+ end
66
+ end
67
+
68
+ def read_cells(sheet = default_sheet)
69
+ sheet ||= default_sheet
70
+ return if @cells_read[sheet]
71
+ @first_row[sheet] = 1
72
+ @last_row[sheet] = 0
73
+ @first_column[sheet] = 1
74
+ @last_column[sheet] = 1
75
+ rownum = 1
76
+ each_row csv_options do |row|
77
+ row.each_with_index do |elem,i|
78
+ @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
79
+ @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
80
+ if i+1 > @last_column[sheet]
81
+ @last_column[sheet] += 1
82
+ end
83
+ end
84
+ rownum += 1
85
+ @last_row[sheet] += 1
86
+ end
87
+ @cells_read[sheet] = true
88
+ #-- adjust @first_row if neccessary
89
+ while !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
90
+ @first_row[sheet] += 1
91
+ end
92
+ #-- adjust @last_row if neccessary
93
+ while !row(@last_row[sheet]).any? and @last_row[sheet] and
94
+ @last_row[sheet] > @first_row[sheet]
95
+ @last_row[sheet] -= 1
96
+ end
97
+ #-- adjust @first_column if neccessary
98
+ while !column(@first_column[sheet]).any? and
99
+ @first_column[sheet] and
100
+ @first_column[sheet] < @last_column[sheet]
101
+ @first_column[sheet] += 1
102
+ end
103
+ #-- adjust @last_column if neccessary
104
+ while !column(@last_column[sheet]).any? and
105
+ @last_column[sheet] and
106
+ @last_column[sheet] > @first_column[sheet]
107
+ @last_column[sheet] -= 1
108
+ end
109
+ end
110
+
111
+ def clean_sheet(sheet)
112
+ read_cells(sheet)
113
+
114
+ @cell.each_pair do |coord, value|
115
+ @cell[coord] = sanitize_value(value) if value.is_a?(::String)
116
+ end
117
+
118
+ @cleaned[sheet] = true
119
+ end
120
+ end
@@ -1,8 +1,18 @@
1
1
  require 'date'
2
2
  require 'nokogiri'
3
- require 'spreadsheet'
3
+ require 'roo/link'
4
+ require 'roo/utils'
5
+ require 'zip/filesystem'
4
6
 
5
7
  class Roo::Excelx < Roo::Base
8
+ autoload :Workbook, 'roo/excelx/workbook'
9
+ autoload :SharedStrings, 'roo/excelx/shared_strings'
10
+ autoload :Styles, 'roo/excelx/styles'
11
+
12
+ autoload :Relationships, 'roo/excelx/relationships'
13
+ autoload :Comments, 'roo/excelx/comments'
14
+ autoload :SheetDoc, 'roo/excelx/sheet_doc'
15
+
6
16
  module Format
7
17
  EXCEPTIONAL_FORMATS = {
8
18
  'h:mm am/pm' => :date,
@@ -46,7 +56,7 @@ class Roo::Excelx < Roo::Base
46
56
  type
47
57
  elsif format.include?('#')
48
58
  :float
49
- elsif format.include?('d') || format.include?('y')
59
+ elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
50
60
  if format.include?('h') || format.include?('s')
51
61
  :datetime
52
62
  else
@@ -64,140 +74,314 @@ class Roo::Excelx < Roo::Base
64
74
  module_function :to_type
65
75
  end
66
76
 
67
- # initialization and opening of a spreadsheet file
68
- # values for packed: :zip
69
- def initialize(filename, options = {}, deprecated_file_warning = :error)
70
- if Hash === options
71
- packed = options[:packed]
72
- file_warning = options[:file_warning] || :error
73
- else
74
- warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprecated. Use an options hash instead.'
75
- packed = options
76
- file_warning = deprecated_file_warning
77
+ class Cell
78
+ attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
79
+ attr_writer :value
80
+
81
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
82
+ @type = type
83
+ @formula = formula
84
+ @base_date = base_date if [:date, :datetime].include?(@type)
85
+ @excelx_type = excelx_type
86
+ @excelx_value = excelx_value
87
+ @style = style
88
+ @value = type_cast_value(value)
89
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
90
+ @coordinate = coordinate
77
91
  end
78
92
 
79
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
80
- make_tmpdir do |tmpdir|
81
- filename = download_uri(filename, tmpdir) if uri?(filename)
82
- filename = unzip(filename, tmpdir) if packed == :zip
83
- @filename = filename
84
- unless File.file?(@filename)
85
- raise IOError, "file #{@filename} does not exist"
93
+ def type
94
+ if @formula
95
+ :formula
96
+ elsif @value.is_a?(Roo::Link)
97
+ :link
98
+ else
99
+ @type
100
+ end
101
+ end
102
+
103
+ class Coordinate
104
+ attr_accessor :row, :column
105
+
106
+ def initialize(row, column)
107
+ @row, @column = row, column
86
108
  end
87
- @comments_files = Array.new
88
- @rels_files = Array.new
89
- extract_content(tmpdir, @filename)
90
- @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml"))
91
- @shared_table = []
92
- if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml'))
93
- @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml'))
94
- read_shared_strings(@sharedstring_doc)
109
+ end
110
+
111
+ private
112
+
113
+ def type_cast_value(value)
114
+ case @type
115
+ when :float, :percentage
116
+ value.to_f
117
+ when :date
118
+ yyyy,mm,dd = (@base_date+value.to_i).strftime("%Y-%m-%d").split('-')
119
+ Date.new(yyyy.to_i,mm.to_i,dd.to_i)
120
+ when :datetime
121
+ create_datetime_from((@base_date+value.to_f.round(6)).strftime("%Y-%m-%d %H:%M:%S.%N"))
122
+ when :time
123
+ value.to_f*(24*60*60)
124
+ when :string
125
+ value
126
+ else
127
+ value
128
+ end
129
+ end
130
+
131
+ def create_datetime_from(datetime_string)
132
+ date_part,time_part = round_time_from(datetime_string).split(' ')
133
+ yyyy,mm,dd = date_part.split('-')
134
+ hh,mi,ss = time_part.split(':')
135
+ DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
136
+ end
137
+
138
+ def round_time_from(datetime_string)
139
+ date_part,time_part = datetime_string.split(' ')
140
+ yyyy,mm,dd = date_part.split('-')
141
+ hh,mi,ss = time_part.split(':')
142
+ Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0).strftime("%Y-%m-%d %H:%M:%S")
143
+ end
144
+ end
145
+
146
+ class Sheet
147
+ def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
148
+ @name = name
149
+ @rels = Relationships.new(rels_path)
150
+ @comments = Comments.new(comments_path)
151
+ @styles = styles
152
+ @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
153
+ end
154
+
155
+ def cells
156
+ @cells ||= @sheet.cells(@rels)
157
+ end
158
+
159
+ def present_cells
160
+ @present_cells ||= cells.select {|key, cell| cell && cell.value }
161
+ end
162
+
163
+ # Yield each row as array of Excelx::Cell objects
164
+ # accepts options max_rows (int) (offset by 1 for header)
165
+ # and pad_cells (boolean)
166
+ def each_row(options = {}, &block)
167
+ row_count = 0
168
+ @sheet.each_row_streaming do |row|
169
+ break if options[:max_rows] && row_count == options[:max_rows] + 1
170
+ block.call(cells_for_row_element(row, options)) if block_given?
171
+ row_count += 1
172
+ end
173
+ end
174
+
175
+ def row(row_number)
176
+ first_column.upto(last_column).map do |col|
177
+ cells[[row_number,col]]
178
+ end.map {|cell| cell && cell.value }
179
+ end
180
+
181
+ def column(col_number)
182
+ first_row.upto(last_row).map do |row|
183
+ cells[[row,col_number]]
184
+ end.map {|cell| cell && cell.value }
185
+ end
186
+
187
+ # returns the number of the first non-empty row
188
+ def first_row
189
+ @first_row ||= present_cells.keys.map {|row, _| row }.min
190
+ end
191
+
192
+ def last_row
193
+ @last_row ||= present_cells.keys.map {|row, _| row }.max
194
+ end
195
+
196
+ # returns the number of the first non-empty column
197
+ def first_column
198
+ @first_column ||= present_cells.keys.map {|_, col| col }.min
199
+ end
200
+
201
+ # returns the number of the last non-empty column
202
+ def last_column
203
+ @last_column ||= present_cells.keys.map {|_, col| col }.max
204
+ end
205
+
206
+ def excelx_format(key)
207
+ cell = cells[key]
208
+ @styles.style_format(cell.style).to_s if cell
209
+ end
210
+
211
+ def hyperlinks
212
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
213
+ end
214
+
215
+ def comments
216
+ @comments.comments
217
+ end
218
+
219
+ def dimensions
220
+ @sheet.dimensions
221
+ end
222
+
223
+ private
224
+
225
+ # Take an xml row and return an array of Excelx::Cell objects
226
+ # optionally pad array to header width(assumed 1st row).
227
+ # takes option pad_cells (boolean) defaults false
228
+ def cells_for_row_element(row_element, options = {})
229
+ return [] unless row_element
230
+ cell_col = 0
231
+ cells = []
232
+ @sheet.each_cell(row_element) do |cell|
233
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
234
+ cells << cell
235
+ cell_col = cell.coordinate.column
95
236
  end
96
- @styles_table = []
97
- @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
98
- if File.exist?(File.join(tmpdir, 'roo_styles.xml'))
99
- @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml'))
100
- read_styles(@styles_doc)
237
+ cells
238
+ end
239
+
240
+ def pad_cells(cell, last_column)
241
+ pad = []
242
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
243
+ pad
244
+ end
245
+ end
246
+
247
+ ExceedsMaxError = Class.new(StandardError)
248
+
249
+ # initialization and opening of a spreadsheet file
250
+ # values for packed: :zip
251
+ # optional cell_max (int) parameter for early aborting attempts to parse
252
+ # enormous documents.
253
+ def initialize(filename, options = {})
254
+ packed = options[:packed]
255
+ file_warning = options.fetch(:file_warning, :error)
256
+ cell_max = options.delete(:cell_max)
257
+ sheet_options = {}
258
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
259
+
260
+ file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
261
+
262
+ @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
263
+ @filename = local_filename(filename, @tmpdir, packed)
264
+ @comments_files = []
265
+ @rels_files = []
266
+ process_zipfile(@tmpdir, @filename)
267
+
268
+ @sheet_names = workbook.sheets.map do |sheet|
269
+ unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
270
+ sheet['name']
101
271
  end
102
- @sheet_doc = load_xmls(@sheet_files)
103
- @comments_doc = load_xmls(@comments_files)
104
- @rels_doc = load_xmls(@rels_files)
105
- end
106
- super(filename, options)
107
- @formula = Hash.new
108
- @excelx_type = Hash.new
109
- @excelx_value = Hash.new
110
- @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
111
- @comment = Hash.new
112
- @comments_read = Hash.new
113
- @hyperlink = Hash.new
114
- @hyperlinks_read = Hash.new
115
- end
116
-
117
- def method_missing(m,*args)
118
- # is method name a label name
119
- read_labels
120
- if @label.has_key?(m.to_s)
121
- sheet ||= @default_sheet
122
- read_cells(sheet)
123
- row,col = label(m.to_s)
124
- cell(row,col)
272
+ end.compact
273
+ @sheets = []
274
+ @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
275
+ @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
276
+ [sheet_name, @sheets[n]]
277
+ end]
278
+
279
+ if cell_max
280
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
281
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
282
+ end
283
+
284
+ super
285
+ end
286
+
287
+ def method_missing(method,*args)
288
+ if label = workbook.defined_names[method.to_s]
289
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
125
290
  else
126
291
  # call super for methods like #a1
127
292
  super
128
293
  end
129
294
  end
130
295
 
296
+ def sheets
297
+ @sheet_names
298
+ end
299
+
300
+ def sheet_for(sheet)
301
+ sheet ||= default_sheet
302
+ validate_sheet!(sheet)
303
+ @sheets_by_name[sheet]
304
+ end
305
+
131
306
  # Returns the content of a spreadsheet-cell.
132
307
  # (1,1) is the upper left corner.
133
308
  # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
134
309
  # cell at the first line and first row.
135
310
  def cell(row, col, sheet=nil)
136
- sheet ||= @default_sheet
137
- read_cells(sheet)
138
- row,col = normalize(row,col)
139
- if celltype(row,col,sheet) == :date
140
- yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
141
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
142
- elsif celltype(row,col,sheet) == :datetime
143
- date_part,time_part = @cell[sheet][[row,col]].split(' ')
144
- yyyy,mm,dd = date_part.split('-')
145
- hh,mi,ss = time_part.split(':')
146
- return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
311
+ key = normalize(row,col)
312
+ safe_send(sheet_for(sheet).cells[key], :value)
313
+ end
314
+
315
+ def row(rownumber,sheet=nil)
316
+ sheet_for(sheet).row(rownumber)
317
+ end
318
+
319
+ # returns all values in this column as an array
320
+ # column numbers are 1,2,3,... like in the spreadsheet
321
+ def column(column_number,sheet=nil)
322
+ if column_number.is_a?(::String)
323
+ column_number = ::Roo::Utils.letter_to_number(column_number)
147
324
  end
148
- @cell[sheet][[row,col]]
325
+ sheet_for(sheet).column(column_number)
149
326
  end
150
327
 
328
+ # returns the number of the first non-empty row
329
+ def first_row(sheet=nil)
330
+ sheet_for(sheet).first_row
331
+ end
332
+
333
+ # returns the number of the last non-empty row
334
+ def last_row(sheet=nil)
335
+ sheet_for(sheet).last_row
336
+ end
337
+
338
+ # returns the number of the first non-empty column
339
+ def first_column(sheet=nil)
340
+ sheet_for(sheet).first_column
341
+ end
342
+
343
+ # returns the number of the last non-empty column
344
+ def last_column(sheet=nil)
345
+ sheet_for(sheet).last_column
346
+ end
347
+
348
+ # set a cell to a certain value
349
+ # (this will not be saved back to the spreadsheet file!)
350
+ def set(row,col,value, sheet = nil) #:nodoc:
351
+ key = normalize(row,col)
352
+ cell_type = cell_type_by_value(value)
353
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Cell::Coordinate.new(row, col))
354
+ end
355
+
356
+
151
357
  # Returns the formula at (row,col).
152
358
  # Returns nil if there is no formula.
153
359
  # The method #formula? checks if there is a formula.
154
360
  def formula(row,col,sheet=nil)
155
- sheet ||= @default_sheet
156
- read_cells(sheet)
157
- row,col = normalize(row,col)
158
- @formula[sheet][[row,col]] && @formula[sheet][[row,col]]
361
+ key = normalize(row,col)
362
+ safe_send(sheet_for(sheet).cells[key], :formula)
159
363
  end
160
- alias_method :formula?, :formula
161
364
 
162
- # returns each formula in the selected sheet as an array of elements
163
- # [row, col, formula]
164
- def formulas(sheet=nil)
165
- sheet ||= @default_sheet
166
- read_cells(sheet)
167
- if @formula[sheet]
168
- @formula[sheet].each.collect do |elem|
169
- [elem[0][0], elem[0][1], elem[1]]
170
- end
171
- else
172
- []
173
- end
365
+ # Predicate methods really should return a boolean
366
+ # value. Hopefully no one was relying on the fact that this
367
+ # previously returned either nil/formula
368
+ def formula?(*args)
369
+ !!formula(*args)
174
370
  end
175
371
 
176
- class Font
177
- attr_accessor :bold, :italic, :underline
178
-
179
- def bold?
180
- @bold == true
181
- end
182
-
183
- def italic?
184
- @italic == true
185
- end
186
-
187
- def underline?
188
- @underline == true
372
+ # returns each formula in the selected sheet as an array of tuples in following format
373
+ # [[row, col, formula], [row, col, formula],...]
374
+ def formulas(sheet=nil)
375
+ sheet_for(sheet).cells.select {|_, cell| cell.formula }.map do |(x, y), cell|
376
+ [x, y, cell.formula]
189
377
  end
190
378
  end
191
379
 
192
380
  # Given a cell, return the cell's style
193
381
  def font(row, col, sheet=nil)
194
- sheet ||= @default_sheet
195
- read_cells(sheet)
196
- row,col = normalize(row,col)
197
- s_attribute = @s_attribute[sheet][[row,col]]
198
- s_attribute ||= 0
199
- s_attribute = s_attribute.to_i
200
- @style_definitions[s_attribute]
382
+ key = normalize(row,col)
383
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
384
+ styles.definitions[definition_index] if definition_index
201
385
  end
202
386
 
203
387
  # returns the type of a cell:
@@ -209,14 +393,8 @@ class Roo::Excelx < Roo::Base
209
393
  # * :time
210
394
  # * :datetime
211
395
  def celltype(row,col,sheet=nil)
212
- sheet ||= @default_sheet
213
- read_cells(sheet)
214
- row,col = normalize(row,col)
215
- if @formula[sheet][[row,col]]
216
- return :formula
217
- else
218
- @cell_type[sheet][[row,col]]
219
- end
396
+ key = normalize(row, col)
397
+ safe_send(sheet_for(sheet).cells[key], :type)
220
398
  end
221
399
 
222
400
  # returns the internal type of an excel cell
@@ -224,451 +402,241 @@ class Roo::Excelx < Roo::Base
224
402
  # * :string
225
403
  # Note: this is only available within the Excelx class
226
404
  def excelx_type(row,col,sheet=nil)
227
- sheet ||= @default_sheet
228
- read_cells(sheet)
229
- row,col = normalize(row,col)
230
- return @excelx_type[sheet][[row,col]]
405
+ key = normalize(row,col)
406
+ safe_send(sheet_for(sheet).cells[key], :excelx_type)
231
407
  end
232
408
 
233
409
  # returns the internal value of an excelx cell
234
410
  # Note: this is only available within the Excelx class
235
411
  def excelx_value(row,col,sheet=nil)
236
- sheet ||= @default_sheet
237
- read_cells(sheet)
238
- row,col = normalize(row,col)
239
- return @excelx_value[sheet][[row,col]]
412
+ key = normalize(row,col)
413
+ safe_send(sheet_for(sheet).cells[key], :excelx_value)
240
414
  end
241
415
 
242
416
  # returns the internal format of an excel cell
243
417
  def excelx_format(row,col,sheet=nil)
244
- sheet ||= @default_sheet
245
- read_cells(sheet)
246
- row,col = normalize(row,col)
247
- s = @s_attribute[sheet][[row,col]]
248
- attribute2format(s).to_s
418
+ key = normalize(row,col)
419
+ sheet_for(sheet).excelx_format(key)
249
420
  end
250
421
 
251
- # returns an array of sheet names in the spreadsheet
252
- def sheets
253
- @workbook_doc.xpath("//xmlns:sheet").map do |sheet|
254
- sheet['name']
255
- end
422
+ def empty?(row,col,sheet=nil)
423
+ sheet = sheet_for(sheet)
424
+ key = normalize(row,col)
425
+ cell = sheet.cells[key]
426
+ !cell || !cell.value || (cell.type == :string && cell.value.empty?) \
427
+ || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
256
428
  end
257
429
 
258
430
  # shows the internal representation of all cells
259
431
  # for debugging purposes
260
432
  def to_s(sheet=nil)
261
- sheet ||= @default_sheet
262
- read_cells(sheet)
263
- @cell[sheet].inspect
433
+ sheet_for(sheet).cells.inspect
264
434
  end
265
435
 
266
436
  # returns the row,col values of the labelled cell
267
437
  # (nil,nil) if label is not defined
268
- def label(labelname)
269
- read_labels
270
- if @label.empty? || !@label.has_key?(labelname)
271
- return nil,nil,nil
438
+ def label(name)
439
+ labels = workbook.defined_names
440
+ if labels.empty? || !labels.key?(name)
441
+ [nil,nil,nil]
272
442
  else
273
- return @label[labelname][1].to_i,
274
- Roo::Base.letter_to_number(@label[labelname][2]),
275
- @label[labelname][0]
443
+ [labels[name].row,
444
+ labels[name].col,
445
+ labels[name].sheet]
276
446
  end
277
447
  end
278
448
 
279
449
  # Returns an array which all labels. Each element is an array with
280
450
  # [labelname, [row,col,sheetname]]
281
451
  def labels
282
- # sheet ||= @default_sheet
283
- # read_cells(sheet)
284
- read_labels
285
- @label.map do |label|
286
- [ label[0], # name
287
- [ label[1][1].to_i, # row
288
- Roo::Base.letter_to_number(label[1][2]), # column
289
- label[1][0], # sheet
452
+ @labels ||= workbook.defined_names.map do |name, label|
453
+ [ name,
454
+ [ label.row,
455
+ label.col,
456
+ label.sheet,
290
457
  ] ]
291
458
  end
292
459
  end
293
460
 
294
461
  def hyperlink?(row,col,sheet=nil)
295
- hyperlink(row, col, sheet) != nil
462
+ !!hyperlink(row, col, sheet)
296
463
  end
297
464
 
298
465
  # returns the hyperlink at (row/col)
299
466
  # nil if there is no hyperlink
300
467
  def hyperlink(row,col,sheet=nil)
301
- sheet ||= @default_sheet
302
- read_hyperlinks(sheet) unless @hyperlinks_read[sheet]
303
- row,col = normalize(row,col)
304
- return nil unless @hyperlink[sheet]
305
- @hyperlink[sheet][[row,col]]
468
+ key = normalize(row,col)
469
+ sheet_for(sheet).hyperlinks[key]
306
470
  end
307
471
 
308
472
  # returns the comment at (row/col)
309
473
  # nil if there is no comment
310
474
  def comment(row,col,sheet=nil)
311
- sheet ||= @default_sheet
312
- #read_cells(sheet)
313
- read_comments(sheet) unless @comments_read[sheet]
314
- row,col = normalize(row,col)
315
- return nil unless @comment[sheet]
316
- @comment[sheet][[row,col]]
475
+ key = normalize(row,col)
476
+ sheet_for(sheet).comments[key]
317
477
  end
318
478
 
319
479
  # true, if there is a comment
320
480
  def comment?(row,col,sheet=nil)
321
- sheet ||= @default_sheet
322
- # read_cells(sheet)
323
- read_comments(sheet) unless @comments_read[sheet]
324
- row,col = normalize(row,col)
325
- comment(row,col) != nil
481
+ !!comment(row,col,sheet)
326
482
  end
327
483
 
328
- # returns each comment in the selected sheet as an array of elements
329
- # [row, col, comment]
330
484
  def comments(sheet=nil)
331
- sheet ||= @default_sheet
332
- read_comments(sheet) unless @comments_read[sheet]
333
- if @comment[sheet]
334
- @comment[sheet].each.collect do |elem|
335
- [elem[0][0],elem[0][1],elem[1]]
336
- end
337
- else
338
- []
485
+ sheet_for(sheet).comments.map do |(x, y), comment|
486
+ [x, y, comment]
339
487
  end
340
488
  end
341
489
 
342
- private
490
+ # Yield an array of Excelx::Cell
491
+ # Takes options for sheet, pad_cells, and max_rows
492
+ def each_row_streaming(options={})
493
+ sheet_for(options.delete(:sheet)).each_row(options) { |row| yield row }
494
+ end
343
495
 
344
- def load_xmls(paths)
345
- paths.compact.map do |item|
346
- load_xml(item)
347
- end
348
- end
349
-
350
- # helper function to set the internal representation of cells
351
- def set_cell_values(sheet,x,y,i,v,value_type,formula,
352
- excelx_type=nil,
353
- excelx_value=nil,
354
- s_attribute=nil)
355
- key = [y,x+i]
356
- @cell_type[sheet] ||= {}
357
- @cell_type[sheet][key] = value_type
358
- @formula[sheet] ||= {}
359
- @formula[sheet][key] = formula if formula
360
- @cell[sheet] ||= {}
361
- @cell[sheet][key] =
362
- case @cell_type[sheet][key]
363
- when :float
364
- v.to_f
365
- when :string
366
- v
367
- when :date
368
- (base_date+v.to_i).strftime("%Y-%m-%d")
369
- when :datetime
370
- (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
371
- when :percentage
372
- v.to_f
373
- when :time
374
- v.to_f*(24*60*60)
375
- else
376
- v
377
- end
496
+ private
378
497
 
379
- @cell[sheet][key] = Spreadsheet::Link.new(@hyperlink[sheet][key], @cell[sheet][key].to_s) if hyperlink?(y,x+i)
380
- @excelx_type[sheet] ||= {}
381
- @excelx_type[sheet][key] = excelx_type
382
- @excelx_value[sheet] ||= {}
383
- @excelx_value[sheet][key] = excelx_value
384
- @s_attribute[sheet] ||= {}
385
- @s_attribute[sheet][key] = s_attribute
386
- end
498
+ def clean_sheet(sheet)
499
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
500
+ next unless value.value.is_a?(::String)
387
501
 
388
- # read all cells in the selected sheet
389
- def read_cells(sheet=nil)
390
- sheet ||= @default_sheet
391
- validate_sheet!(sheet)
392
- return if @cells_read[sheet]
393
-
394
- @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c|
395
- s_attribute = c['s'].to_i # should be here
396
- # c: <c r="A5" s="2">
397
- # <v>22606</v>
398
- # </c>, format: , tmp_type: float
399
- value_type =
400
- case c['t']
401
- when 's'
402
- :shared
403
- when 'b'
404
- :boolean
405
- # 2011-02-25 BEGIN
406
- when 'str'
407
- :string
408
- # 2011-02-25 END
409
- # 2011-09-15 BEGIN
410
- when 'inlineStr'
411
- :inlinestr
412
- # 2011-09-15 END
413
- else
414
- format = attribute2format(s_attribute)
415
- Format.to_type(format)
416
- end
417
- formula = nil
418
- c.children.each do |cell|
419
- case cell.name
420
- when 'is'
421
- cell.children.each do |is|
422
- if is.name == 't'
423
- inlinestr_content = is.content
424
- value_type = :string
425
- v = inlinestr_content
426
- excelx_type = :string
427
- y, x = Roo::Base.split_coordinate(c['r'])
428
- excelx_value = inlinestr_content #cell.content
429
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
430
- end
431
- end
432
- when 'f'
433
- formula = cell.content
434
- when 'v'
435
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
436
- value_type =
437
- if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
438
- :datetime
439
- else
440
- :date
441
- end
442
- end
443
- excelx_type = [:numeric_or_formula,format.to_s]
444
- excelx_value = cell.content
445
- v =
446
- case value_type
447
- when :shared
448
- value_type = :string
449
- excelx_type = :string
450
- @shared_table[cell.content.to_i]
451
- when :boolean
452
- (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
453
- when :date
454
- cell.content
455
- when :time
456
- cell.content
457
- when :datetime
458
- cell.content
459
- when :formula
460
- cell.content.to_f #TODO: !!!!
461
- when :string
462
- excelx_type = :string
463
- cell.content
464
- else
465
- value_type = :float
466
- cell.content
467
- end
468
- y, x = Roo::Base.split_coordinate(c['r'])
469
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
470
- end
471
- end
502
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
472
503
  end
473
- @cells_read[sheet] = true
474
- # begin comments
475
- =begin
476
- Datei xl/comments1.xml
477
- <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
478
- <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
479
- <authors>
480
- <author />
481
- </authors>
482
- <commentList>
483
- <comment ref="B4" authorId="0">
484
- <text>
485
- <r>
486
- <rPr>
487
- <sz val="10" />
488
- <rFont val="Arial" />
489
- <family val="2" />
490
- </rPr>
491
- <t>Kommentar fuer B4</t>
492
- </r>
493
- </text>
494
- </comment>
495
- <comment ref="B5" authorId="0">
496
- <text>
497
- <r>
498
- <rPr>
499
- <sz val="10" />
500
- <rFont val="Arial" />
501
- <family val="2" />
502
- </rPr>
503
- <t>Kommentar fuer B5</t>
504
- </r>
505
- </text>
506
- </comment>
507
- </commentList>
508
- </comments>
509
- =end
510
- =begin
511
- if @comments_doc[self.sheets.index(sheet)]
512
- read_comments(sheet)
513
- end
514
- =end
515
- #end comments
516
- end
517
-
518
- # Reads all comments from a sheet
519
- def read_comments(sheet=nil)
520
- sheet ||= @default_sheet
521
- validate_sheet!(sheet)
522
- n = self.sheets.index(sheet)
523
- return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
524
- @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment|
525
- ref = comment.attributes['ref'].to_s
526
- row,col = Roo::Base.split_coordinate(ref)
527
- comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text|
528
- @comment[sheet] ||= {}
529
- @comment[sheet][[row,col]] = text.text
530
- end
531
- end
532
- @comments_read[sheet] = true
533
- end
534
504
 
535
- # Reads all hyperlinks from a sheet
536
- def read_hyperlinks(sheet=nil)
537
- sheet ||= @default_sheet
538
- validate_sheet!(sheet)
539
- n = self.sheets.index(sheet)
540
- if rels_doc = @rels_doc[n]
541
- rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r|
542
- [r.attribute('Id').text, r]
543
- end]
544
- @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h|
545
- if rel_element = rels[h.attribute('id').text]
546
- row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s)
547
- @hyperlink[sheet] ||= {}
548
- @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text
549
- end
550
- end
505
+ @cleaned[sheet] = true
506
+ end
507
+
508
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
509
+ # documents require a workbook.xml file, so a if the file is missing
510
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
511
+ # raised.
512
+ #
513
+ # wb - a Zip::Entry for the workbook.xml file.
514
+ # path - A String for Zip::Entry's destination path.
515
+ #
516
+ # Examples
517
+ #
518
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
519
+ # # => ["rId1", "rId2", "rId3"]
520
+ #
521
+ # Returns an Array of Strings.
522
+ def extract_worksheet_ids(entries, path)
523
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
524
+ fail ArgumentError 'missing required workbook file' if wb.nil?
525
+
526
+ wb.extract(path)
527
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
528
+ workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value }
529
+ end
530
+
531
+ # Internal
532
+ #
533
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
534
+ # path - A String for the Zip::Entry's destination path.
535
+ #
536
+ # Examples
537
+ #
538
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
539
+ # # => {
540
+ # "rId1"=>"worksheets/sheet1.xml",
541
+ # "rId2"=>"worksheets/sheet2.xml",
542
+ # "rId3"=>"worksheets/sheet3.xml"
543
+ # }
544
+ #
545
+ # Returns a Hash.
546
+ def extract_worksheet_rels(entries, path)
547
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
548
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
549
+
550
+ wb_rels.extract(path)
551
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
552
+ worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
553
+
554
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
555
+ relationship.attributes['Type'].value == worksheet_type
551
556
  end
552
- @hyperlinks_read[sheet] = true
553
- end
554
557
 
555
- def read_labels
556
- @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name|
557
- # "Sheet1!$C$5"
558
- sheet, coordinates = defined_name.text.split('!$', 2)
559
- col,row = coordinates.split('$')
560
- [defined_name['name'], [sheet,row,col]]
561
- end]
562
- end
563
-
564
- # Extracts all needed files from the zip file
565
- def process_zipfile(tmpdir, zipfilename, zip, path='')
566
- @sheet_files = []
567
- Roo::ZipFile.open(zipfilename) {|zf|
568
- zf.entries.each {|entry|
569
- entry_name = entry.to_s.downcase
570
-
571
- path =
572
- if entry_name.end_with?('workbook.xml')
573
- "#{tmpdir}/roo_workbook.xml"
574
- elsif entry_name.end_with?('sharedstrings.xml')
575
- "#{tmpdir}/roo_sharedStrings.xml"
576
- elsif entry_name.end_with?('styles.xml')
577
- "#{tmpdir}/roo_styles.xml"
578
- elsif entry_name =~ /sheet([0-9]+).xml$/
579
- nr = $1
580
- @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}"
581
- elsif entry_name =~ /comments([0-9]+).xml$/
582
- nr = $1
583
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
584
- elsif entry_name =~ /sheet([0-9]+).xml.rels$/
585
- nr = $1
586
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
587
- end
588
- if path
589
- extract_file(zip, entry, path)
590
- end
591
- }
592
- }
593
- end
594
-
595
- def extract_file(source_zip, entry, destination_path)
596
- open(destination_path,'wb') {|f|
597
- f << source_zip.read(entry)
598
- }
599
- end
600
-
601
- # extract files from the zip file
602
- def extract_content(tmpdir, zipfilename)
603
- Roo::ZipFile.open(@filename) do |zip|
604
- process_zipfile(tmpdir, zipfilename,zip)
558
+ relationships.inject({}) do |hash, relationship|
559
+ attributes = relationship.attributes
560
+ id = attributes['Id'];
561
+ hash[id.value] = attributes['Target'].value
562
+ hash
605
563
  end
606
564
  end
607
565
 
608
- # read the shared strings xml document
609
- def read_shared_strings(doc)
610
- doc.xpath("/xmlns:sst/xmlns:si").each do |si|
611
- shared_table_entry = ''
612
- si.children.each do |elem|
613
- if elem.name == 'r' and elem.children
614
- elem.children.each do |r_elem|
615
- if r_elem.name == 't'
616
- shared_table_entry << r_elem.content
617
- end
618
- end
619
- end
620
- if elem.name == 't'
621
- shared_table_entry = elem.content
622
- end
623
- end
624
- @shared_table << shared_table_entry
566
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
567
+ sheet_ids.each_with_index do |id, i|
568
+ name = sheets[id]
569
+ entry = entries.find { |entry| entry.name =~ /#{name}$/ }
570
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
571
+ @sheet_files << path
572
+ entry.extract(path)
625
573
  end
626
574
  end
627
575
 
628
- # read the styles elements of an excelx document
629
- def read_styles(doc)
630
- @cellXfs = []
631
-
632
- @numFmts = Hash[doc.xpath("//xmlns:numFmt").map do |numFmt|
633
- [numFmt['numFmtId'], numFmt['formatCode']]
634
- end]
635
- fonts = doc.xpath("//xmlns:fonts/xmlns:font").map do |font_el|
636
- Font.new.tap do |font|
637
- font.bold = !font_el.xpath('./xmlns:b').empty?
638
- font.italic = !font_el.xpath('./xmlns:i').empty?
639
- font.underline = !font_el.xpath('./xmlns:u').empty?
576
+ # Extracts all needed files from the zip file
577
+ def process_zipfile(tmpdir, zipfilename)
578
+ @sheet_files = []
579
+ entries = Zip::File.open(zipfilename).to_a.sort_by(&:name)
580
+
581
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
582
+ # are not in order. With Numbers 3.1, the first sheet is always
583
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
584
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
585
+ # first worksheet).
586
+ #
587
+ # workbook.xml lists the correct order of worksheets and
588
+ # workbook.xml.rels lists the filenames for those worksheets.
589
+ #
590
+ # workbook.xml:
591
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
592
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
593
+ # workbook.xml.rel:
594
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
595
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
596
+ sheet_ids = extract_worksheet_ids(entries, "#{tmpdir}/roo_workbook.xml")
597
+ sheets = extract_worksheet_rels(entries, "#{tmpdir}/roo_workbook.xml.rels")
598
+ extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
599
+
600
+ entries.each do |entry|
601
+ path =
602
+ case entry.name.downcase
603
+ when /sharedstrings.xml$/
604
+ "#{tmpdir}/roo_sharedStrings.xml"
605
+ when /styles.xml$/
606
+ "#{tmpdir}/roo_styles.xml"
607
+ when /comments([0-9]+).xml$/
608
+ # FIXME: Most of the time, The order of the comment files are the same
609
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
610
+ # In some situations, this isn't true. The true location of a
611
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
612
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
613
+ nr = Regexp.last_match[1].to_i
614
+ @comments_files[nr - 1] = "#{tmpdir}/roo_comments#{nr}"
615
+ when /sheet([0-9]+).xml.rels$/
616
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
617
+ # it also stores the location for sharedStrings, comments,
618
+ # drawings, etc.
619
+ nr = Regexp.last_match[1].to_i
620
+ @rels_files[nr - 1] = "#{tmpdir}/roo_rels#{nr}"
640
621
  end
641
- end
642
622
 
643
- doc.xpath("//xmlns:cellXfs").each do |xfs|
644
- xfs.children.each do |xf|
645
- @cellXfs << xf['numFmtId']
646
- @style_definitions << fonts[xf['fontId'].to_i]
647
- end
623
+ entry.extract(path) if path
648
624
  end
649
625
  end
650
626
 
651
- # convert internal excelx attribute to a format
652
- def attribute2format(s)
653
- id = @cellXfs[s.to_i]
654
- @numFmts[id] || Format::STANDARD_FORMATS[id.to_i]
627
+ def styles
628
+ @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml'))
655
629
  end
656
630
 
657
- def base_date
658
- @base_date ||= read_base_date
631
+ def shared_strings
632
+ @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml'))
659
633
  end
660
634
 
661
- # Default to 1900 (minus one day due to excel quirk) but use 1904 if
662
- # it's set in the Workbook's workbookPr
663
- # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
664
- def read_base_date
665
- base_date = Date.new(1899,12,30)
666
- @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr|
667
- if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i
668
- base_date = Date.new(1904,01,01)
669
- end
670
- end
671
- base_date
635
+ def workbook
636
+ @workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml"))
672
637
  end
673
638
 
674
- end # class
639
+ def safe_send(object, method, *args)
640
+ object.send(method, *args) if object && object.respond_to?(method)
641
+ end
642
+ end