roo 1.13.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG.md +515 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo/base.rb +334 -395
  15. data/lib/roo/csv.rb +120 -113
  16. data/lib/roo/excelx/cell.rb +77 -0
  17. data/lib/roo/excelx/comments.rb +22 -0
  18. data/lib/roo/excelx/extractor.rb +22 -0
  19. data/lib/roo/excelx/relationships.rb +25 -0
  20. data/lib/roo/excelx/shared_strings.rb +37 -0
  21. data/lib/roo/excelx/sheet.rb +107 -0
  22. data/lib/roo/excelx/sheet_doc.rb +200 -0
  23. data/lib/roo/excelx/styles.rb +64 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/excelx.rb +413 -597
  26. data/lib/roo/font.rb +17 -0
  27. data/lib/roo/libre_office.rb +5 -0
  28. data/lib/roo/link.rb +15 -0
  29. data/lib/roo/{openoffice.rb → open_office.rb} +681 -496
  30. data/lib/roo/spreadsheet.rb +20 -23
  31. data/lib/roo/utils.rb +78 -0
  32. data/lib/roo/version.rb +3 -0
  33. data/lib/roo.rb +18 -24
  34. data/roo.gemspec +20 -204
  35. data/spec/lib/roo/base_spec.rb +1 -4
  36. data/spec/lib/roo/csv_spec.rb +21 -13
  37. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  38. data/spec/lib/roo/excelx_spec.rb +424 -11
  39. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  40. data/spec/lib/roo/openoffice_spec.rb +13 -8
  41. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  42. data/spec/lib/roo/utils_spec.rb +106 -0
  43. data/spec/spec_helper.rb +2 -1
  44. data/test/test_generic_spreadsheet.rb +117 -139
  45. data/test/test_helper.rb +9 -56
  46. data/test/test_roo.rb +274 -478
  47. metadata +65 -303
  48. data/CHANGELOG +0 -417
  49. data/Gemfile.lock +0 -78
  50. data/README.markdown +0 -126
  51. data/VERSION +0 -1
  52. data/lib/roo/excel.rb +0 -355
  53. data/lib/roo/excel2003xml.rb +0 -300
  54. data/lib/roo/google.rb +0 -292
  55. data/lib/roo/roo_rails_helper.rb +0 -83
  56. data/lib/roo/worksheet.rb +0 -18
  57. data/scripts/txt2html +0 -67
  58. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  59. data/spec/lib/roo/excel_spec.rb +0 -17
  60. data/spec/lib/roo/google_spec.rb +0 -64
  61. data/test/files/1900_base.xls +0 -0
  62. data/test/files/1900_base.xlsx +0 -0
  63. data/test/files/1904_base.xls +0 -0
  64. data/test/files/1904_base.xlsx +0 -0
  65. data/test/files/Bibelbund.csv +0 -3741
  66. data/test/files/Bibelbund.ods +0 -0
  67. data/test/files/Bibelbund.xls +0 -0
  68. data/test/files/Bibelbund.xlsx +0 -0
  69. data/test/files/Bibelbund.xml +0 -62518
  70. data/test/files/Bibelbund1.ods +0 -0
  71. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  72. data/test/files/bad_excel_date.xls +0 -0
  73. data/test/files/bbu.ods +0 -0
  74. data/test/files/bbu.xls +0 -0
  75. data/test/files/bbu.xlsx +0 -0
  76. data/test/files/bbu.xml +0 -152
  77. data/test/files/bode-v1.ods.zip +0 -0
  78. data/test/files/bode-v1.xls.zip +0 -0
  79. data/test/files/boolean.csv +0 -2
  80. data/test/files/boolean.ods +0 -0
  81. data/test/files/boolean.xls +0 -0
  82. data/test/files/boolean.xlsx +0 -0
  83. data/test/files/boolean.xml +0 -112
  84. data/test/files/borders.ods +0 -0
  85. data/test/files/borders.xls +0 -0
  86. data/test/files/borders.xlsx +0 -0
  87. data/test/files/borders.xml +0 -144
  88. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  89. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  90. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  91. data/test/files/comments.ods +0 -0
  92. data/test/files/comments.xls +0 -0
  93. data/test/files/comments.xlsx +0 -0
  94. data/test/files/csvtypes.csv +0 -1
  95. data/test/files/datetime.ods +0 -0
  96. data/test/files/datetime.xls +0 -0
  97. data/test/files/datetime.xlsx +0 -0
  98. data/test/files/datetime.xml +0 -142
  99. data/test/files/datetime_floatconv.xls +0 -0
  100. data/test/files/datetime_floatconv.xml +0 -148
  101. data/test/files/dreimalvier.ods +0 -0
  102. data/test/files/emptysheets.ods +0 -0
  103. data/test/files/emptysheets.xls +0 -0
  104. data/test/files/emptysheets.xlsx +0 -0
  105. data/test/files/emptysheets.xml +0 -105
  106. data/test/files/excel2003.xml +0 -21140
  107. data/test/files/false_encoding.xls +0 -0
  108. data/test/files/false_encoding.xml +0 -132
  109. data/test/files/file_item_error.xlsx +0 -0
  110. data/test/files/formula.ods +0 -0
  111. data/test/files/formula.xls +0 -0
  112. data/test/files/formula.xlsx +0 -0
  113. data/test/files/formula.xml +0 -134
  114. data/test/files/formula_parse_error.xls +0 -0
  115. data/test/files/formula_parse_error.xml +0 -1833
  116. data/test/files/formula_string_error.xlsx +0 -0
  117. data/test/files/html-escape.ods +0 -0
  118. data/test/files/link.xls +0 -0
  119. data/test/files/link.xlsx +0 -0
  120. data/test/files/matrix.ods +0 -0
  121. data/test/files/matrix.xls +0 -0
  122. data/test/files/named_cells.ods +0 -0
  123. data/test/files/named_cells.xls +0 -0
  124. data/test/files/named_cells.xlsx +0 -0
  125. data/test/files/no_spreadsheet_file.txt +0 -1
  126. data/test/files/numbers1.csv +0 -18
  127. data/test/files/numbers1.ods +0 -0
  128. data/test/files/numbers1.xls +0 -0
  129. data/test/files/numbers1.xlsx +0 -0
  130. data/test/files/numbers1.xml +0 -312
  131. data/test/files/numeric-link.xlsx +0 -0
  132. data/test/files/only_one_sheet.ods +0 -0
  133. data/test/files/only_one_sheet.xls +0 -0
  134. data/test/files/only_one_sheet.xlsx +0 -0
  135. data/test/files/only_one_sheet.xml +0 -67
  136. data/test/files/paragraph.ods +0 -0
  137. data/test/files/paragraph.xls +0 -0
  138. data/test/files/paragraph.xlsx +0 -0
  139. data/test/files/paragraph.xml +0 -127
  140. data/test/files/prova.xls +0 -0
  141. data/test/files/ric.ods +0 -0
  142. data/test/files/simple_spreadsheet.ods +0 -0
  143. data/test/files/simple_spreadsheet.xls +0 -0
  144. data/test/files/simple_spreadsheet.xlsx +0 -0
  145. data/test/files/simple_spreadsheet.xml +0 -225
  146. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  147. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  148. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  149. data/test/files/so_datetime.csv +0 -7
  150. data/test/files/style.ods +0 -0
  151. data/test/files/style.xls +0 -0
  152. data/test/files/style.xlsx +0 -0
  153. data/test/files/style.xml +0 -154
  154. data/test/files/time-test.csv +0 -2
  155. data/test/files/time-test.ods +0 -0
  156. data/test/files/time-test.xls +0 -0
  157. data/test/files/time-test.xlsx +0 -0
  158. data/test/files/time-test.xml +0 -131
  159. data/test/files/type_excel.ods +0 -0
  160. data/test/files/type_excel.xlsx +0 -0
  161. data/test/files/type_excelx.ods +0 -0
  162. data/test/files/type_excelx.xls +0 -0
  163. data/test/files/type_openoffice.xls +0 -0
  164. data/test/files/type_openoffice.xlsx +0 -0
  165. data/test/files/whitespace.ods +0 -0
  166. data/test/files/whitespace.xls +0 -0
  167. data/test/files/whitespace.xlsx +0 -0
  168. data/test/files/whitespace.xml +0 -184
  169. data/test/rm_sub_test.rb +0 -12
  170. data/test/rm_test.rb +0 -7
  171. data/website/index.html +0 -385
  172. data/website/index.txt +0 -423
  173. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  174. data/website/stylesheets/screen.css +0 -130
  175. data/website/template.rhtml +0 -48
data/lib/roo/excelx.rb CHANGED
@@ -1,674 +1,490 @@
1
- require 'date'
2
1
  require 'nokogiri'
3
- require 'spreadsheet'
4
-
5
- class Roo::Excelx < Roo::Base
6
- module Format
7
- EXCEPTIONAL_FORMATS = {
8
- 'h:mm am/pm' => :date,
9
- 'h:mm:ss am/pm' => :date,
10
- }
11
-
12
- STANDARD_FORMATS = {
13
- 0 => 'General',
14
- 1 => '0',
15
- 2 => '0.00',
16
- 3 => '#,##0',
17
- 4 => '#,##0.00',
18
- 9 => '0%',
19
- 10 => '0.00%',
20
- 11 => '0.00E+00',
21
- 12 => '# ?/?',
22
- 13 => '# ??/??',
23
- 14 => 'mm-dd-yy',
24
- 15 => 'd-mmm-yy',
25
- 16 => 'd-mmm',
26
- 17 => 'mmm-yy',
27
- 18 => 'h:mm AM/PM',
28
- 19 => 'h:mm:ss AM/PM',
29
- 20 => 'h:mm',
30
- 21 => 'h:mm:ss',
31
- 22 => 'm/d/yy h:mm',
32
- 37 => '#,##0 ;(#,##0)',
33
- 38 => '#,##0 ;[Red](#,##0)',
34
- 39 => '#,##0.00;(#,##0.00)',
35
- 40 => '#,##0.00;[Red](#,##0.00)',
36
- 45 => 'mm:ss',
37
- 46 => '[h]:mm:ss',
38
- 47 => 'mmss.0',
39
- 48 => '##0.0E+0',
40
- 49 => '@',
41
- }
42
-
43
- def to_type(format)
44
- format = format.to_s.downcase
45
- if type = EXCEPTIONAL_FORMATS[format]
46
- type
47
- elsif format.include?('#')
48
- :float
49
- elsif format.include?('d') || format.include?('y')
50
- if format.include?('h') || format.include?('s')
51
- :datetime
2
+ require 'zip/filesystem'
3
+ require 'roo/link'
4
+ require 'roo/utils'
5
+
6
+ module Roo
7
+ class Excelx < Roo::Base
8
+ require 'roo/excelx/workbook'
9
+ require 'roo/excelx/shared_strings'
10
+ require 'roo/excelx/styles'
11
+ require 'roo/excelx/cell'
12
+ require 'roo/excelx/sheet'
13
+ require 'roo/excelx/relationships'
14
+ require 'roo/excelx/comments'
15
+ require 'roo/excelx/sheet_doc'
16
+
17
+ module Format
18
+ EXCEPTIONAL_FORMATS = {
19
+ 'h:mm am/pm' => :date,
20
+ 'h:mm:ss am/pm' => :date
21
+ }
22
+
23
+ STANDARD_FORMATS = {
24
+ 0 => 'General'.freeze,
25
+ 1 => '0'.freeze,
26
+ 2 => '0.00'.freeze,
27
+ 3 => '#,##0'.freeze,
28
+ 4 => '#,##0.00'.freeze,
29
+ 9 => '0%'.freeze,
30
+ 10 => '0.00%'.freeze,
31
+ 11 => '0.00E+00'.freeze,
32
+ 12 => '# ?/?'.freeze,
33
+ 13 => '# ??/??'.freeze,
34
+ 14 => 'mm-dd-yy'.freeze,
35
+ 15 => 'd-mmm-yy'.freeze,
36
+ 16 => 'd-mmm'.freeze,
37
+ 17 => 'mmm-yy'.freeze,
38
+ 18 => 'h:mm AM/PM'.freeze,
39
+ 19 => 'h:mm:ss AM/PM'.freeze,
40
+ 20 => 'h:mm'.freeze,
41
+ 21 => 'h:mm:ss'.freeze,
42
+ 22 => 'm/d/yy h:mm'.freeze,
43
+ 37 => '#,##0 ;(#,##0)'.freeze,
44
+ 38 => '#,##0 ;[Red](#,##0)'.freeze,
45
+ 39 => '#,##0.00;(#,##0.00)'.freeze,
46
+ 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
47
+ 45 => 'mm:ss'.freeze,
48
+ 46 => '[h]:mm:ss'.freeze,
49
+ 47 => 'mmss.0'.freeze,
50
+ 48 => '##0.0E+0'.freeze,
51
+ 49 => '@'.freeze
52
+ }
53
+
54
+ def to_type(format)
55
+ format = format.to_s.downcase
56
+ if (type = EXCEPTIONAL_FORMATS[format])
57
+ type
58
+ elsif format.include?('#')
59
+ :float
60
+ elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
61
+ if format.include?('h') || format.include?('s')
62
+ :datetime
63
+ else
64
+ :date
65
+ end
66
+ elsif format.include?('h') || format.include?('s')
67
+ :time
68
+ elsif format.include?('%')
69
+ :percentage
52
70
  else
53
- :date
71
+ :float
54
72
  end
55
- elsif format.include?('h') || format.include?('s')
56
- :time
57
- elsif format.include?('%')
58
- :percentage
59
- else
60
- :float
61
73
  end
74
+
75
+ module_function :to_type
62
76
  end
63
77
 
64
- module_function :to_type
65
- end
78
+ ExceedsMaxError = Class.new(StandardError)
66
79
 
67
- # initialization and opening of a spreadsheet file
68
- # values for packed: :zip
69
- def initialize(filename, options = {}, deprecated_file_warning = :error)
70
- if Hash === options
80
+ # initialization and opening of a spreadsheet file
81
+ # values for packed: :zip
82
+ # optional cell_max (int) parameter for early aborting attempts to parse
83
+ # enormous documents.
84
+ def initialize(filename_or_stream, options = {})
71
85
  packed = options[:packed]
72
- file_warning = options[:file_warning] || :error
73
- else
74
- warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprecated. Use an options hash instead.'
75
- packed = options
76
- file_warning = deprecated_file_warning
77
- end
78
-
79
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
80
- make_tmpdir do |tmpdir|
81
- filename = download_uri(filename, tmpdir) if uri?(filename)
82
- filename = unzip(filename, tmpdir) if packed == :zip
83
- @filename = filename
84
- unless File.file?(@filename)
85
- raise IOError, "file #{@filename} does not exist"
86
- end
87
- @comments_files = Array.new
88
- @rels_files = Array.new
89
- extract_content(tmpdir, @filename)
90
- @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml"))
91
- @shared_table = []
92
- if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml'))
93
- @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml'))
94
- read_shared_strings(@sharedstring_doc)
86
+ file_warning = options.fetch(:file_warning, :error)
87
+ cell_max = options.delete(:cell_max)
88
+ sheet_options = {}
89
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
90
+
91
+ unless is_stream?(filename_or_stream)
92
+ file_type_check(filename_or_stream, '.xlsx', 'an Excel-xlsx', file_warning, packed)
93
+ basename = File.basename(filename_or_stream)
95
94
  end
96
- @styles_table = []
97
- @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
98
- if File.exist?(File.join(tmpdir, 'roo_styles.xml'))
99
- @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml'))
100
- read_styles(@styles_doc)
95
+
96
+ @tmpdir = make_tmpdir(basename, options[:tmpdir_root])
97
+ @filename = local_filename(filename_or_stream, @tmpdir, packed)
98
+ @comments_files = []
99
+ @rels_files = []
100
+ process_zipfile(@filename || filename_or_stream)
101
+
102
+ @sheet_names = workbook.sheets.map do |sheet|
103
+ unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
104
+ sheet['name']
105
+ end
106
+ end.compact
107
+ @sheets = []
108
+ @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
109
+ @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
110
+ [sheet_name, @sheets[n]]
111
+ end]
112
+
113
+ if cell_max
114
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
115
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
101
116
  end
102
- @sheet_doc = load_xmls(@sheet_files)
103
- @comments_doc = load_xmls(@comments_files)
104
- @rels_doc = load_xmls(@rels_files)
105
- end
106
- super(filename, options)
107
- @formula = Hash.new
108
- @excelx_type = Hash.new
109
- @excelx_value = Hash.new
110
- @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
111
- @comment = Hash.new
112
- @comments_read = Hash.new
113
- @hyperlink = Hash.new
114
- @hyperlinks_read = Hash.new
115
- end
116
117
 
117
- def method_missing(m,*args)
118
- # is method name a label name
119
- read_labels
120
- if @label.has_key?(m.to_s)
121
- sheet ||= @default_sheet
122
- read_cells(sheet)
123
- row,col = label(m.to_s)
124
- cell(row,col)
125
- else
126
- # call super for methods like #a1
127
118
  super
119
+ rescue => e # clean up any temp files, but only if an error was raised
120
+ close
121
+ raise e
128
122
  end
129
- end
130
123
 
131
- # Returns the content of a spreadsheet-cell.
132
- # (1,1) is the upper left corner.
133
- # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
134
- # cell at the first line and first row.
135
- def cell(row, col, sheet=nil)
136
- sheet ||= @default_sheet
137
- read_cells(sheet)
138
- row,col = normalize(row,col)
139
- if celltype(row,col,sheet) == :date
140
- yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
141
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
142
- elsif celltype(row,col,sheet) == :datetime
143
- date_part,time_part = @cell[sheet][[row,col]].split(' ')
144
- yyyy,mm,dd = date_part.split('-')
145
- hh,mi,ss = time_part.split(':')
146
- return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
147
- end
148
- @cell[sheet][[row,col]]
149
- end
124
+ def method_missing(method, *args)
125
+ if (label = workbook.defined_names[method.to_s])
126
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
127
+ else
128
+ # call super for methods like #a1
129
+ super
130
+ end
131
+ end
150
132
 
151
- # Returns the formula at (row,col).
152
- # Returns nil if there is no formula.
153
- # The method #formula? checks if there is a formula.
154
- def formula(row,col,sheet=nil)
155
- sheet ||= @default_sheet
156
- read_cells(sheet)
157
- row,col = normalize(row,col)
158
- @formula[sheet][[row,col]] && @formula[sheet][[row,col]]
159
- end
160
- alias_method :formula?, :formula
161
-
162
- # returns each formula in the selected sheet as an array of elements
163
- # [row, col, formula]
164
- def formulas(sheet=nil)
165
- sheet ||= @default_sheet
166
- read_cells(sheet)
167
- if @formula[sheet]
168
- @formula[sheet].each.collect do |elem|
169
- [elem[0][0], elem[0][1], elem[1]]
133
+ def sheets
134
+ @sheet_names
135
+ end
136
+
137
+ def sheet_for(sheet)
138
+ sheet ||= default_sheet
139
+ validate_sheet!(sheet)
140
+ @sheets_by_name[sheet]
141
+ end
142
+
143
+ # Returns the content of a spreadsheet-cell.
144
+ # (1,1) is the upper left corner.
145
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
146
+ # cell at the first line and first row.
147
+ def cell(row, col, sheet = nil)
148
+ key = normalize(row, col)
149
+ safe_send(sheet_for(sheet).cells[key], :value)
150
+ end
151
+
152
+ def row(rownumber, sheet = nil)
153
+ sheet_for(sheet).row(rownumber)
154
+ end
155
+
156
+ # returns all values in this column as an array
157
+ # column numbers are 1,2,3,... like in the spreadsheet
158
+ def column(column_number, sheet = nil)
159
+ if column_number.is_a?(::String)
160
+ column_number = ::Roo::Utils.letter_to_number(column_number)
170
161
  end
171
- else
172
- []
162
+ sheet_for(sheet).column(column_number)
173
163
  end
174
- end
175
164
 
176
- class Font
177
- attr_accessor :bold, :italic, :underline
165
+ # returns the number of the first non-empty row
166
+ def first_row(sheet = nil)
167
+ sheet_for(sheet).first_row
168
+ end
178
169
 
179
- def bold?
180
- @bold == true
170
+ # returns the number of the last non-empty row
171
+ def last_row(sheet = nil)
172
+ sheet_for(sheet).last_row
181
173
  end
182
174
 
183
- def italic?
184
- @italic == true
175
+ # returns the number of the first non-empty column
176
+ def first_column(sheet = nil)
177
+ sheet_for(sheet).first_column
185
178
  end
186
179
 
187
- def underline?
188
- @underline == true
180
+ # returns the number of the last non-empty column
181
+ def last_column(sheet = nil)
182
+ sheet_for(sheet).last_column
189
183
  end
190
- end
191
184
 
192
- # Given a cell, return the cell's style
193
- def font(row, col, sheet=nil)
194
- sheet ||= @default_sheet
195
- read_cells(sheet)
196
- row,col = normalize(row,col)
197
- s_attribute = @s_attribute[sheet][[row,col]]
198
- s_attribute ||= 0
199
- s_attribute = s_attribute.to_i
200
- @style_definitions[s_attribute]
201
- end
185
+ # set a cell to a certain value
186
+ # (this will not be saved back to the spreadsheet file!)
187
+ def set(row, col, value, sheet = nil) #:nodoc:
188
+ key = normalize(row, col)
189
+ cell_type = cell_type_by_value(value)
190
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Cell::Coordinate.new(row, col))
191
+ end
202
192
 
203
- # returns the type of a cell:
204
- # * :float
205
- # * :string,
206
- # * :date
207
- # * :percentage
208
- # * :formula
209
- # * :time
210
- # * :datetime
211
- def celltype(row,col,sheet=nil)
212
- sheet ||= @default_sheet
213
- read_cells(sheet)
214
- row,col = normalize(row,col)
215
- if @formula[sheet][[row,col]]
216
- return :formula
217
- else
218
- @cell_type[sheet][[row,col]]
193
+ # Returns the formula at (row,col).
194
+ # Returns nil if there is no formula.
195
+ # The method #formula? checks if there is a formula.
196
+ def formula(row, col, sheet = nil)
197
+ key = normalize(row, col)
198
+ safe_send(sheet_for(sheet).cells[key], :formula)
219
199
  end
220
- end
221
200
 
222
- # returns the internal type of an excel cell
223
- # * :numeric_or_formula
224
- # * :string
225
- # Note: this is only available within the Excelx class
226
- def excelx_type(row,col,sheet=nil)
227
- sheet ||= @default_sheet
228
- read_cells(sheet)
229
- row,col = normalize(row,col)
230
- return @excelx_type[sheet][[row,col]]
231
- end
201
+ # Predicate methods really should return a boolean
202
+ # value. Hopefully no one was relying on the fact that this
203
+ # previously returned either nil/formula
204
+ def formula?(*args)
205
+ !!formula(*args)
206
+ end
232
207
 
233
- # returns the internal value of an excelx cell
234
- # Note: this is only available within the Excelx class
235
- def excelx_value(row,col,sheet=nil)
236
- sheet ||= @default_sheet
237
- read_cells(sheet)
238
- row,col = normalize(row,col)
239
- return @excelx_value[sheet][[row,col]]
240
- end
208
+ # returns each formula in the selected sheet as an array of tuples in following format
209
+ # [[row, col, formula], [row, col, formula],...]
210
+ def formulas(sheet = nil)
211
+ sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
212
+ [x, y, cell.formula]
213
+ end
214
+ end
241
215
 
242
- # returns the internal format of an excel cell
243
- def excelx_format(row,col,sheet=nil)
244
- sheet ||= @default_sheet
245
- read_cells(sheet)
246
- row,col = normalize(row,col)
247
- s = @s_attribute[sheet][[row,col]]
248
- attribute2format(s).to_s
249
- end
216
+ # Given a cell, return the cell's style
217
+ def font(row, col, sheet = nil)
218
+ key = normalize(row, col)
219
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
220
+ styles.definitions[definition_index] if definition_index
221
+ end
250
222
 
251
- # returns an array of sheet names in the spreadsheet
252
- def sheets
253
- @workbook_doc.xpath("//xmlns:sheet").map do |sheet|
254
- sheet['name']
223
+ # returns the type of a cell:
224
+ # * :float
225
+ # * :string,
226
+ # * :date
227
+ # * :percentage
228
+ # * :formula
229
+ # * :time
230
+ # * :datetime
231
+ def celltype(row, col, sheet = nil)
232
+ key = normalize(row, col)
233
+ safe_send(sheet_for(sheet).cells[key], :type)
255
234
  end
256
- end
257
235
 
258
- # shows the internal representation of all cells
259
- # for debugging purposes
260
- def to_s(sheet=nil)
261
- sheet ||= @default_sheet
262
- read_cells(sheet)
263
- @cell[sheet].inspect
264
- end
236
+ # returns the internal type of an excel cell
237
+ # * :numeric_or_formula
238
+ # * :string
239
+ # Note: this is only available within the Excelx class
240
+ def excelx_type(row, col, sheet = nil)
241
+ key = normalize(row, col)
242
+ safe_send(sheet_for(sheet).cells[key], :excelx_type)
243
+ end
265
244
 
266
- # returns the row,col values of the labelled cell
267
- # (nil,nil) if label is not defined
268
- def label(labelname)
269
- read_labels
270
- if @label.empty? || !@label.has_key?(labelname)
271
- return nil,nil,nil
272
- else
273
- return @label[labelname][1].to_i,
274
- Roo::Base.letter_to_number(@label[labelname][2]),
275
- @label[labelname][0]
245
+ # returns the internal value of an excelx cell
246
+ # Note: this is only available within the Excelx class
247
+ def excelx_value(row, col, sheet = nil)
248
+ key = normalize(row, col)
249
+ safe_send(sheet_for(sheet).cells[key], :excelx_value)
276
250
  end
277
- end
278
251
 
279
- # Returns an array which all labels. Each element is an array with
280
- # [labelname, [row,col,sheetname]]
281
- def labels
282
- # sheet ||= @default_sheet
283
- # read_cells(sheet)
284
- read_labels
285
- @label.map do |label|
286
- [ label[0], # name
287
- [ label[1][1].to_i, # row
288
- Roo::Base.letter_to_number(label[1][2]), # column
289
- label[1][0], # sheet
290
- ] ]
252
+ # returns the internal format of an excel cell
253
+ def excelx_format(row, col, sheet = nil)
254
+ key = normalize(row, col)
255
+ sheet_for(sheet).excelx_format(key)
291
256
  end
292
- end
293
257
 
294
- def hyperlink?(row,col,sheet=nil)
295
- hyperlink(row, col, sheet) != nil
296
- end
258
+ def empty?(row, col, sheet = nil)
259
+ sheet = sheet_for(sheet)
260
+ key = normalize(row, col)
261
+ cell = sheet.cells[key]
262
+ !cell || !cell.value || (cell.type == :string && cell.value.empty?) \
263
+ || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
264
+ end
297
265
 
298
- # returns the hyperlink at (row/col)
299
- # nil if there is no hyperlink
300
- def hyperlink(row,col,sheet=nil)
301
- sheet ||= @default_sheet
302
- read_hyperlinks(sheet) unless @hyperlinks_read[sheet]
303
- row,col = normalize(row,col)
304
- return nil unless @hyperlink[sheet]
305
- @hyperlink[sheet][[row,col]]
306
- end
266
+ # shows the internal representation of all cells
267
+ # for debugging purposes
268
+ def to_s(sheet = nil)
269
+ sheet_for(sheet).cells.inspect
270
+ end
307
271
 
308
- # returns the comment at (row/col)
309
- # nil if there is no comment
310
- def comment(row,col,sheet=nil)
311
- sheet ||= @default_sheet
312
- #read_cells(sheet)
313
- read_comments(sheet) unless @comments_read[sheet]
314
- row,col = normalize(row,col)
315
- return nil unless @comment[sheet]
316
- @comment[sheet][[row,col]]
317
- end
272
+ # returns the row,col values of the labelled cell
273
+ # (nil,nil) if label is not defined
274
+ def label(name)
275
+ labels = workbook.defined_names
276
+ return [nil, nil, nil] if labels.empty? || !labels.key?(name)
318
277
 
319
- # true, if there is a comment
320
- def comment?(row,col,sheet=nil)
321
- sheet ||= @default_sheet
322
- # read_cells(sheet)
323
- read_comments(sheet) unless @comments_read[sheet]
324
- row,col = normalize(row,col)
325
- comment(row,col) != nil
326
- end
278
+ [labels[name].row, labels[name].col, labels[name].sheet]
279
+ end
327
280
 
328
- # returns each comment in the selected sheet as an array of elements
329
- # [row, col, comment]
330
- def comments(sheet=nil)
331
- sheet ||= @default_sheet
332
- read_comments(sheet) unless @comments_read[sheet]
333
- if @comment[sheet]
334
- @comment[sheet].each.collect do |elem|
335
- [elem[0][0],elem[0][1],elem[1]]
281
+ # Returns an array which all labels. Each element is an array with
282
+ # [labelname, [row,col,sheetname]]
283
+ def labels
284
+ @labels ||= workbook.defined_names.map do |name, label|
285
+ [
286
+ name,
287
+ [label.row, label.col, label.sheet]
288
+ ]
336
289
  end
337
- else
338
- []
339
290
  end
340
- end
341
291
 
342
- private
292
+ def hyperlink?(row, col, sheet = nil)
293
+ !!hyperlink(row, col, sheet)
294
+ end
343
295
 
344
- def load_xmls(paths)
345
- paths.compact.map do |item|
346
- load_xml(item)
296
+ # returns the hyperlink at (row/col)
297
+ # nil if there is no hyperlink
298
+ def hyperlink(row, col, sheet = nil)
299
+ key = normalize(row, col)
300
+ sheet_for(sheet).hyperlinks[key]
347
301
  end
348
- end
349
302
 
350
- # helper function to set the internal representation of cells
351
- def set_cell_values(sheet,x,y,i,v,value_type,formula,
352
- excelx_type=nil,
353
- excelx_value=nil,
354
- s_attribute=nil)
355
- key = [y,x+i]
356
- @cell_type[sheet] ||= {}
357
- @cell_type[sheet][key] = value_type
358
- @formula[sheet] ||= {}
359
- @formula[sheet][key] = formula if formula
360
- @cell[sheet] ||= {}
361
- @cell[sheet][key] =
362
- case @cell_type[sheet][key]
363
- when :float
364
- v.to_f
365
- when :string
366
- v
367
- when :date
368
- (base_date+v.to_i).strftime("%Y-%m-%d")
369
- when :datetime
370
- (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
371
- when :percentage
372
- v.to_f
373
- when :time
374
- v.to_f*(24*60*60)
375
- else
376
- v
377
- end
303
+ # returns the comment at (row/col)
304
+ # nil if there is no comment
305
+ def comment(row, col, sheet = nil)
306
+ key = normalize(row, col)
307
+ sheet_for(sheet).comments[key]
308
+ end
378
309
 
379
- @cell[sheet][key] = Spreadsheet::Link.new(@hyperlink[sheet][key], @cell[sheet][key].to_s) if hyperlink?(y,x+i)
380
- @excelx_type[sheet] ||= {}
381
- @excelx_type[sheet][key] = excelx_type
382
- @excelx_value[sheet] ||= {}
383
- @excelx_value[sheet][key] = excelx_value
384
- @s_attribute[sheet] ||= {}
385
- @s_attribute[sheet][key] = s_attribute
386
- end
310
+ # true, if there is a comment
311
+ def comment?(row, col, sheet = nil)
312
+ !!comment(row, col, sheet)
313
+ end
387
314
 
388
- # read all cells in the selected sheet
389
- def read_cells(sheet=nil)
390
- sheet ||= @default_sheet
391
- validate_sheet!(sheet)
392
- return if @cells_read[sheet]
393
-
394
- @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c|
395
- s_attribute = c['s'].to_i # should be here
396
- # c: <c r="A5" s="2">
397
- # <v>22606</v>
398
- # </c>, format: , tmp_type: float
399
- value_type =
400
- case c['t']
401
- when 's'
402
- :shared
403
- when 'b'
404
- :boolean
405
- # 2011-02-25 BEGIN
406
- when 'str'
407
- :string
408
- # 2011-02-25 END
409
- # 2011-09-15 BEGIN
410
- when 'inlineStr'
411
- :inlinestr
412
- # 2011-09-15 END
413
- else
414
- format = attribute2format(s_attribute)
415
- Format.to_type(format)
416
- end
417
- formula = nil
418
- c.children.each do |cell|
419
- case cell.name
420
- when 'is'
421
- cell.children.each do |is|
422
- if is.name == 't'
423
- inlinestr_content = is.content
424
- value_type = :string
425
- v = inlinestr_content
426
- excelx_type = :string
427
- y, x = Roo::Base.split_coordinate(c['r'])
428
- excelx_value = inlinestr_content #cell.content
429
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
430
- end
431
- end
432
- when 'f'
433
- formula = cell.content
434
- when 'v'
435
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
436
- value_type =
437
- if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
438
- :datetime
439
- else
440
- :date
441
- end
442
- end
443
- excelx_type = [:numeric_or_formula,format.to_s]
444
- excelx_value = cell.content
445
- v =
446
- case value_type
447
- when :shared
448
- value_type = :string
449
- excelx_type = :string
450
- @shared_table[cell.content.to_i]
451
- when :boolean
452
- (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
453
- when :date
454
- cell.content
455
- when :time
456
- cell.content
457
- when :datetime
458
- cell.content
459
- when :formula
460
- cell.content.to_f #TODO: !!!!
461
- when :string
462
- excelx_type = :string
463
- cell.content
464
- else
465
- value_type = :float
466
- cell.content
467
- end
468
- y, x = Roo::Base.split_coordinate(c['r'])
469
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
470
- end
315
+ def comments(sheet = nil)
316
+ sheet_for(sheet).comments.map do |(x, y), comment|
317
+ [x, y, comment]
471
318
  end
472
319
  end
473
- @cells_read[sheet] = true
474
- # begin comments
475
- =begin
476
- Datei xl/comments1.xml
477
- <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
478
- <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
479
- <authors>
480
- <author />
481
- </authors>
482
- <commentList>
483
- <comment ref="B4" authorId="0">
484
- <text>
485
- <r>
486
- <rPr>
487
- <sz val="10" />
488
- <rFont val="Arial" />
489
- <family val="2" />
490
- </rPr>
491
- <t>Kommentar fuer B4</t>
492
- </r>
493
- </text>
494
- </comment>
495
- <comment ref="B5" authorId="0">
496
- <text>
497
- <r>
498
- <rPr>
499
- <sz val="10" />
500
- <rFont val="Arial" />
501
- <family val="2" />
502
- </rPr>
503
- <t>Kommentar fuer B5</t>
504
- </r>
505
- </text>
506
- </comment>
507
- </commentList>
508
- </comments>
509
- =end
510
- =begin
511
- if @comments_doc[self.sheets.index(sheet)]
512
- read_comments(sheet)
513
- end
514
- =end
515
- #end comments
516
- end
517
320
 
518
- # Reads all comments from a sheet
519
- def read_comments(sheet=nil)
520
- sheet ||= @default_sheet
521
- validate_sheet!(sheet)
522
- n = self.sheets.index(sheet)
523
- return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
524
- @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment|
525
- ref = comment.attributes['ref'].to_s
526
- row,col = Roo::Base.split_coordinate(ref)
527
- comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text|
528
- @comment[sheet] ||= {}
529
- @comment[sheet][[row,col]] = text.text
530
- end
321
+ # Yield an array of Excelx::Cell
322
+ # Takes options for sheet, pad_cells, and max_rows
323
+ def each_row_streaming(options = {})
324
+ sheet_for(options.delete(:sheet)).each_row(options) { |row| yield row }
531
325
  end
532
- @comments_read[sheet] = true
533
- end
534
326
 
535
- # Reads all hyperlinks from a sheet
536
- def read_hyperlinks(sheet=nil)
537
- sheet ||= @default_sheet
538
- validate_sheet!(sheet)
539
- n = self.sheets.index(sheet)
540
- if rels_doc = @rels_doc[n]
541
- rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r|
542
- [r.attribute('Id').text, r]
543
- end]
544
- @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h|
545
- if rel_element = rels[h.attribute('id').text]
546
- row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s)
547
- @hyperlink[sheet] ||= {}
548
- @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text
549
- end
327
+ private
328
+
329
+ def clean_sheet(sheet)
330
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
331
+ next unless value.value.is_a?(::String)
332
+
333
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
550
334
  end
551
- end
552
- @hyperlinks_read[sheet] = true
553
- end
554
335
 
555
- def read_labels
556
- @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name|
557
- # "Sheet1!$C$5"
558
- sheet, coordinates = defined_name.text.split('!$', 2)
559
- col,row = coordinates.split('$')
560
- [defined_name['name'], [sheet,row,col]]
561
- end]
562
- end
336
+ @cleaned[sheet] = true
337
+ end
563
338
 
564
- # Extracts all needed files from the zip file
565
- def process_zipfile(tmpdir, zipfilename, zip, path='')
566
- @sheet_files = []
567
- Roo::ZipFile.open(zipfilename) {|zf|
568
- zf.entries.each {|entry|
569
- entry_name = entry.to_s.downcase
339
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
340
+ # documents require a workbook.xml file, so a if the file is missing
341
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
342
+ # raised.
343
+ #
344
+ # wb - a Zip::Entry for the workbook.xml file.
345
+ # path - A String for Zip::Entry's destination path.
346
+ #
347
+ # Examples
348
+ #
349
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
350
+ # # => ["rId1", "rId2", "rId3"]
351
+ #
352
+ # Returns an Array of Strings.
353
+ def extract_worksheet_ids(entries, path)
354
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
355
+ fail ArgumentError 'missing required workbook file' if wb.nil?
356
+
357
+ wb.extract(path)
358
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
359
+ workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value }
360
+ end
570
361
 
571
- path =
572
- if entry_name.end_with?('workbook.xml')
573
- "#{tmpdir}/roo_workbook.xml"
574
- elsif entry_name.end_with?('sharedstrings.xml')
575
- "#{tmpdir}/roo_sharedStrings.xml"
576
- elsif entry_name.end_with?('styles.xml')
577
- "#{tmpdir}/roo_styles.xml"
578
- elsif entry_name =~ /sheet([0-9]+).xml$/
579
- nr = $1
580
- @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}"
581
- elsif entry_name =~ /comments([0-9]+).xml$/
582
- nr = $1
583
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
584
- elsif entry_name =~ /sheet([0-9]+).xml.rels$/
585
- nr = $1
586
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
587
- end
588
- if path
589
- extract_file(zip, entry, path)
590
- end
591
- }
592
- }
593
- end
362
+ # Internal
363
+ #
364
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
365
+ # path - A String for the Zip::Entry's destination path.
366
+ #
367
+ # Examples
368
+ #
369
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
370
+ # # => {
371
+ # "rId1"=>"worksheets/sheet1.xml",
372
+ # "rId2"=>"worksheets/sheet2.xml",
373
+ # "rId3"=>"worksheets/sheet3.xml"
374
+ # }
375
+ #
376
+ # Returns a Hash.
377
+ def extract_worksheet_rels(entries, path)
378
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
379
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
380
+
381
+ wb_rels.extract(path)
382
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
383
+ worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
384
+
385
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
386
+ relationship.attributes['Type'].value == worksheet_type
387
+ end
594
388
 
595
- def extract_file(source_zip, entry, destination_path)
596
- open(destination_path,'wb') {|f|
597
- f << source_zip.read(entry)
598
- }
599
- end
389
+ relationships.inject({}) do |hash, relationship|
390
+ attributes = relationship.attributes
391
+ id = attributes['Id']
392
+ hash[id.value] = attributes['Target'].value
393
+ hash
394
+ end
395
+ end
600
396
 
601
- # extract files from the zip file
602
- def extract_content(tmpdir, zipfilename)
603
- Roo::ZipFile.open(@filename) do |zip|
604
- process_zipfile(tmpdir, zipfilename,zip)
397
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
398
+ sheet_ids.each_with_index do |id, i|
399
+ name = sheets[id]
400
+ entry = entries.find { |e| e.name =~ /#{name}$/ }
401
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
402
+ @sheet_files << path
403
+ entry.extract(path)
404
+ end
605
405
  end
606
- end
607
406
 
608
- # read the shared strings xml document
609
- def read_shared_strings(doc)
610
- doc.xpath("/xmlns:sst/xmlns:si").each do |si|
611
- shared_table_entry = ''
612
- si.children.each do |elem|
613
- if elem.name == 'r' and elem.children
614
- elem.children.each do |r_elem|
615
- if r_elem.name == 't'
616
- shared_table_entry << r_elem.content
617
- end
407
+ # Extracts all needed files from the zip file
408
+ def process_zipfile(zipfilename_or_stream)
409
+ @sheet_files = []
410
+
411
+ unless is_stream?(zipfilename_or_stream)
412
+ process_zipfile_entries Zip::File.open(zipfilename_or_stream).to_a.sort_by(&:name)
413
+ else
414
+ stream = Zip::InputStream.open zipfilename_or_stream
415
+ begin
416
+ entries = []
417
+ while (entry = stream.get_next_entry)
418
+ entries << entry
618
419
  end
619
- end
620
- if elem.name == 't'
621
- shared_table_entry = elem.content
420
+ process_zipfile_entries entries
421
+ ensure
422
+ stream.close
622
423
  end
623
424
  end
624
- @shared_table << shared_table_entry
625
425
  end
626
- end
627
426
 
628
- # read the styles elements of an excelx document
629
- def read_styles(doc)
630
- @cellXfs = []
631
-
632
- @numFmts = Hash[doc.xpath("//xmlns:numFmt").map do |numFmt|
633
- [numFmt['numFmtId'], numFmt['formatCode']]
634
- end]
635
- fonts = doc.xpath("//xmlns:fonts/xmlns:font").map do |font_el|
636
- Font.new.tap do |font|
637
- font.bold = !font_el.xpath('./xmlns:b').empty?
638
- font.italic = !font_el.xpath('./xmlns:i').empty?
639
- font.underline = !font_el.xpath('./xmlns:u').empty?
427
+ def process_zipfile_entries(entries)
428
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
429
+ # are not in order. With Numbers 3.1, the first sheet is always
430
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
431
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
432
+ # first worksheet).
433
+ #
434
+ # workbook.xml lists the correct order of worksheets and
435
+ # workbook.xml.rels lists the filenames for those worksheets.
436
+ #
437
+ # workbook.xml:
438
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
439
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
440
+ # workbook.xml.rel:
441
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
442
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
443
+ sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
444
+ sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
445
+ extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
446
+
447
+ entries.each do |entry|
448
+ path =
449
+ case entry.name.downcase
450
+ when /sharedstrings.xml$/
451
+ "#{@tmpdir}/roo_sharedStrings.xml"
452
+ when /styles.xml$/
453
+ "#{@tmpdir}/roo_styles.xml"
454
+ when /comments([0-9]+).xml$/
455
+ # FIXME: Most of the time, The order of the comment files are the same
456
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
457
+ # In some situations, this isn't true. The true location of a
458
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
459
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
460
+ nr = Regexp.last_match[1].to_i
461
+ @comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
462
+ when /sheet([0-9]+).xml.rels$/
463
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
464
+ # it also stores the location for sharedStrings, comments,
465
+ # drawings, etc.
466
+ nr = Regexp.last_match[1].to_i
467
+ @rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
468
+ end
469
+
470
+ entry.extract(path) if path
640
471
  end
641
472
  end
642
473
 
643
- doc.xpath("//xmlns:cellXfs").each do |xfs|
644
- xfs.children.each do |xf|
645
- @cellXfs << xf['numFmtId']
646
- @style_definitions << fonts[xf['fontId'].to_i]
647
- end
474
+ def styles
475
+ @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml'))
648
476
  end
649
- end
650
477
 
651
- # convert internal excelx attribute to a format
652
- def attribute2format(s)
653
- id = @cellXfs[s.to_i]
654
- @numFmts[id] || Format::STANDARD_FORMATS[id.to_i]
655
- end
478
+ def shared_strings
479
+ @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml'))
480
+ end
656
481
 
657
- def base_date
658
- @base_date ||= read_base_date
659
- end
482
+ def workbook
483
+ @workbook ||= Workbook.new(File.join(@tmpdir, 'roo_workbook.xml'))
484
+ end
660
485
 
661
- # Default to 1900 (minus one day due to excel quirk) but use 1904 if
662
- # it's set in the Workbook's workbookPr
663
- # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
664
- def read_base_date
665
- base_date = Date.new(1899,12,30)
666
- @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr|
667
- if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i
668
- base_date = Date.new(1904,01,01)
669
- end
486
+ def safe_send(object, method, *args)
487
+ object.send(method, *args) if object && object.respond_to?(method)
670
488
  end
671
- base_date
672
489
  end
673
-
674
- end # class
490
+ end