roo 1.13.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +17 -0
  3. data/.github/ISSUE_TEMPLATE +10 -0
  4. data/.gitignore +11 -0
  5. data/.simplecov +4 -0
  6. data/.travis.yml +17 -0
  7. data/CHANGELOG.md +626 -0
  8. data/Gemfile +17 -12
  9. data/Gemfile_ruby2 +30 -0
  10. data/Guardfile +23 -0
  11. data/LICENSE +3 -1
  12. data/README.md +285 -0
  13. data/Rakefile +23 -23
  14. data/examples/roo_soap_client.rb +28 -31
  15. data/examples/roo_soap_server.rb +4 -6
  16. data/examples/write_me.rb +9 -10
  17. data/lib/roo/base.rb +298 -495
  18. data/lib/roo/constants.rb +5 -0
  19. data/lib/roo/csv.rb +127 -113
  20. data/lib/roo/errors.rb +11 -0
  21. data/lib/roo/excelx/cell/base.rb +94 -0
  22. data/lib/roo/excelx/cell/boolean.rb +27 -0
  23. data/lib/roo/excelx/cell/date.rb +28 -0
  24. data/lib/roo/excelx/cell/datetime.rb +111 -0
  25. data/lib/roo/excelx/cell/empty.rb +19 -0
  26. data/lib/roo/excelx/cell/number.rb +87 -0
  27. data/lib/roo/excelx/cell/string.rb +19 -0
  28. data/lib/roo/excelx/cell/time.rb +43 -0
  29. data/lib/roo/excelx/cell.rb +106 -0
  30. data/lib/roo/excelx/comments.rb +55 -0
  31. data/lib/roo/excelx/coordinate.rb +12 -0
  32. data/lib/roo/excelx/extractor.rb +21 -0
  33. data/lib/roo/excelx/format.rb +64 -0
  34. data/lib/roo/excelx/relationships.rb +25 -0
  35. data/lib/roo/excelx/shared.rb +32 -0
  36. data/lib/roo/excelx/shared_strings.rb +157 -0
  37. data/lib/roo/excelx/sheet.rb +112 -0
  38. data/lib/roo/excelx/sheet_doc.rb +211 -0
  39. data/lib/roo/excelx/styles.rb +64 -0
  40. data/lib/roo/excelx/workbook.rb +59 -0
  41. data/lib/roo/excelx.rb +376 -602
  42. data/lib/roo/font.rb +17 -0
  43. data/lib/roo/formatters/base.rb +15 -0
  44. data/lib/roo/formatters/csv.rb +84 -0
  45. data/lib/roo/formatters/matrix.rb +23 -0
  46. data/lib/roo/formatters/xml.rb +31 -0
  47. data/lib/roo/formatters/yaml.rb +40 -0
  48. data/lib/roo/libre_office.rb +4 -0
  49. data/lib/roo/link.rb +34 -0
  50. data/lib/roo/open_office.rb +626 -0
  51. data/lib/roo/spreadsheet.rb +22 -23
  52. data/lib/roo/tempdir.rb +21 -0
  53. data/lib/roo/utils.rb +78 -0
  54. data/lib/roo/version.rb +3 -0
  55. data/lib/roo.rb +23 -24
  56. data/roo.gemspec +21 -204
  57. data/spec/helpers.rb +5 -0
  58. data/spec/lib/roo/base_spec.rb +229 -3
  59. data/spec/lib/roo/csv_spec.rb +38 -11
  60. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  61. data/spec/lib/roo/excelx_spec.rb +510 -11
  62. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  63. data/spec/lib/roo/openoffice_spec.rb +30 -8
  64. data/spec/lib/roo/spreadsheet_spec.rb +60 -12
  65. data/spec/lib/roo/utils_spec.rb +106 -0
  66. data/spec/spec_helper.rb +7 -6
  67. data/test/all_ss.rb +12 -11
  68. data/test/excelx/cell/test_base.rb +63 -0
  69. data/test/excelx/cell/test_boolean.rb +36 -0
  70. data/test/excelx/cell/test_date.rb +38 -0
  71. data/test/excelx/cell/test_datetime.rb +45 -0
  72. data/test/excelx/cell/test_empty.rb +7 -0
  73. data/test/excelx/cell/test_number.rb +74 -0
  74. data/test/excelx/cell/test_string.rb +28 -0
  75. data/test/excelx/cell/test_time.rb +30 -0
  76. data/test/formatters/test_csv.rb +119 -0
  77. data/test/formatters/test_matrix.rb +76 -0
  78. data/test/formatters/test_xml.rb +74 -0
  79. data/test/formatters/test_yaml.rb +20 -0
  80. data/test/roo/test_csv.rb +52 -0
  81. data/test/roo/test_excelx.rb +186 -0
  82. data/test/roo/test_libre_office.rb +9 -0
  83. data/test/roo/test_open_office.rb +126 -0
  84. data/test/test_helper.rb +73 -53
  85. data/test/test_roo.rb +1211 -2292
  86. metadata +119 -298
  87. data/CHANGELOG +0 -417
  88. data/Gemfile.lock +0 -78
  89. data/README.markdown +0 -126
  90. data/VERSION +0 -1
  91. data/lib/roo/excel.rb +0 -355
  92. data/lib/roo/excel2003xml.rb +0 -300
  93. data/lib/roo/google.rb +0 -292
  94. data/lib/roo/openoffice.rb +0 -496
  95. data/lib/roo/roo_rails_helper.rb +0 -83
  96. data/lib/roo/worksheet.rb +0 -18
  97. data/scripts/txt2html +0 -67
  98. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  99. data/spec/lib/roo/excel_spec.rb +0 -17
  100. data/spec/lib/roo/google_spec.rb +0 -64
  101. data/test/files/1900_base.xls +0 -0
  102. data/test/files/1900_base.xlsx +0 -0
  103. data/test/files/1904_base.xls +0 -0
  104. data/test/files/1904_base.xlsx +0 -0
  105. data/test/files/Bibelbund.csv +0 -3741
  106. data/test/files/Bibelbund.ods +0 -0
  107. data/test/files/Bibelbund.xls +0 -0
  108. data/test/files/Bibelbund.xlsx +0 -0
  109. data/test/files/Bibelbund.xml +0 -62518
  110. data/test/files/Bibelbund1.ods +0 -0
  111. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  112. data/test/files/bad_excel_date.xls +0 -0
  113. data/test/files/bbu.ods +0 -0
  114. data/test/files/bbu.xls +0 -0
  115. data/test/files/bbu.xlsx +0 -0
  116. data/test/files/bbu.xml +0 -152
  117. data/test/files/bode-v1.ods.zip +0 -0
  118. data/test/files/bode-v1.xls.zip +0 -0
  119. data/test/files/boolean.csv +0 -2
  120. data/test/files/boolean.ods +0 -0
  121. data/test/files/boolean.xls +0 -0
  122. data/test/files/boolean.xlsx +0 -0
  123. data/test/files/boolean.xml +0 -112
  124. data/test/files/borders.ods +0 -0
  125. data/test/files/borders.xls +0 -0
  126. data/test/files/borders.xlsx +0 -0
  127. data/test/files/borders.xml +0 -144
  128. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  129. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  130. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  131. data/test/files/comments.ods +0 -0
  132. data/test/files/comments.xls +0 -0
  133. data/test/files/comments.xlsx +0 -0
  134. data/test/files/csvtypes.csv +0 -1
  135. data/test/files/datetime.ods +0 -0
  136. data/test/files/datetime.xls +0 -0
  137. data/test/files/datetime.xlsx +0 -0
  138. data/test/files/datetime.xml +0 -142
  139. data/test/files/datetime_floatconv.xls +0 -0
  140. data/test/files/datetime_floatconv.xml +0 -148
  141. data/test/files/dreimalvier.ods +0 -0
  142. data/test/files/emptysheets.ods +0 -0
  143. data/test/files/emptysheets.xls +0 -0
  144. data/test/files/emptysheets.xlsx +0 -0
  145. data/test/files/emptysheets.xml +0 -105
  146. data/test/files/excel2003.xml +0 -21140
  147. data/test/files/false_encoding.xls +0 -0
  148. data/test/files/false_encoding.xml +0 -132
  149. data/test/files/file_item_error.xlsx +0 -0
  150. data/test/files/formula.ods +0 -0
  151. data/test/files/formula.xls +0 -0
  152. data/test/files/formula.xlsx +0 -0
  153. data/test/files/formula.xml +0 -134
  154. data/test/files/formula_parse_error.xls +0 -0
  155. data/test/files/formula_parse_error.xml +0 -1833
  156. data/test/files/formula_string_error.xlsx +0 -0
  157. data/test/files/html-escape.ods +0 -0
  158. data/test/files/link.xls +0 -0
  159. data/test/files/link.xlsx +0 -0
  160. data/test/files/matrix.ods +0 -0
  161. data/test/files/matrix.xls +0 -0
  162. data/test/files/named_cells.ods +0 -0
  163. data/test/files/named_cells.xls +0 -0
  164. data/test/files/named_cells.xlsx +0 -0
  165. data/test/files/no_spreadsheet_file.txt +0 -1
  166. data/test/files/numbers1.csv +0 -18
  167. data/test/files/numbers1.ods +0 -0
  168. data/test/files/numbers1.xls +0 -0
  169. data/test/files/numbers1.xlsx +0 -0
  170. data/test/files/numbers1.xml +0 -312
  171. data/test/files/numeric-link.xlsx +0 -0
  172. data/test/files/only_one_sheet.ods +0 -0
  173. data/test/files/only_one_sheet.xls +0 -0
  174. data/test/files/only_one_sheet.xlsx +0 -0
  175. data/test/files/only_one_sheet.xml +0 -67
  176. data/test/files/paragraph.ods +0 -0
  177. data/test/files/paragraph.xls +0 -0
  178. data/test/files/paragraph.xlsx +0 -0
  179. data/test/files/paragraph.xml +0 -127
  180. data/test/files/prova.xls +0 -0
  181. data/test/files/ric.ods +0 -0
  182. data/test/files/simple_spreadsheet.ods +0 -0
  183. data/test/files/simple_spreadsheet.xls +0 -0
  184. data/test/files/simple_spreadsheet.xlsx +0 -0
  185. data/test/files/simple_spreadsheet.xml +0 -225
  186. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  187. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  188. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  189. data/test/files/so_datetime.csv +0 -7
  190. data/test/files/style.ods +0 -0
  191. data/test/files/style.xls +0 -0
  192. data/test/files/style.xlsx +0 -0
  193. data/test/files/style.xml +0 -154
  194. data/test/files/time-test.csv +0 -2
  195. data/test/files/time-test.ods +0 -0
  196. data/test/files/time-test.xls +0 -0
  197. data/test/files/time-test.xlsx +0 -0
  198. data/test/files/time-test.xml +0 -131
  199. data/test/files/type_excel.ods +0 -0
  200. data/test/files/type_excel.xlsx +0 -0
  201. data/test/files/type_excelx.ods +0 -0
  202. data/test/files/type_excelx.xls +0 -0
  203. data/test/files/type_openoffice.xls +0 -0
  204. data/test/files/type_openoffice.xlsx +0 -0
  205. data/test/files/whitespace.ods +0 -0
  206. data/test/files/whitespace.xls +0 -0
  207. data/test/files/whitespace.xlsx +0 -0
  208. data/test/files/whitespace.xml +0 -184
  209. data/test/rm_sub_test.rb +0 -12
  210. data/test/rm_test.rb +0 -7
  211. data/test/test_generic_spreadsheet.rb +0 -259
  212. data/website/index.html +0 -385
  213. data/website/index.txt +0 -423
  214. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  215. data/website/stylesheets/screen.css +0 -130
  216. data/website/template.rhtml +0 -48
data/lib/roo/excelx.rb CHANGED
@@ -1,674 +1,448 @@
1
- require 'date'
2
1
  require 'nokogiri'
3
- require 'spreadsheet'
4
-
5
- class Roo::Excelx < Roo::Base
6
- module Format
7
- EXCEPTIONAL_FORMATS = {
8
- 'h:mm am/pm' => :date,
9
- 'h:mm:ss am/pm' => :date,
10
- }
11
-
12
- STANDARD_FORMATS = {
13
- 0 => 'General',
14
- 1 => '0',
15
- 2 => '0.00',
16
- 3 => '#,##0',
17
- 4 => '#,##0.00',
18
- 9 => '0%',
19
- 10 => '0.00%',
20
- 11 => '0.00E+00',
21
- 12 => '# ?/?',
22
- 13 => '# ??/??',
23
- 14 => 'mm-dd-yy',
24
- 15 => 'd-mmm-yy',
25
- 16 => 'd-mmm',
26
- 17 => 'mmm-yy',
27
- 18 => 'h:mm AM/PM',
28
- 19 => 'h:mm:ss AM/PM',
29
- 20 => 'h:mm',
30
- 21 => 'h:mm:ss',
31
- 22 => 'm/d/yy h:mm',
32
- 37 => '#,##0 ;(#,##0)',
33
- 38 => '#,##0 ;[Red](#,##0)',
34
- 39 => '#,##0.00;(#,##0.00)',
35
- 40 => '#,##0.00;[Red](#,##0.00)',
36
- 45 => 'mm:ss',
37
- 46 => '[h]:mm:ss',
38
- 47 => 'mmss.0',
39
- 48 => '##0.0E+0',
40
- 49 => '@',
41
- }
42
-
43
- def to_type(format)
44
- format = format.to_s.downcase
45
- if type = EXCEPTIONAL_FORMATS[format]
46
- type
47
- elsif format.include?('#')
48
- :float
49
- elsif format.include?('d') || format.include?('y')
50
- if format.include?('h') || format.include?('s')
51
- :datetime
52
- else
53
- :date
2
+ require 'zip/filesystem'
3
+ require 'roo/link'
4
+ require 'roo/tempdir'
5
+ require 'roo/utils'
6
+ require 'forwardable'
7
+ require 'set'
8
+
9
+ module Roo
10
+ class Excelx < Roo::Base
11
+ extend Roo::Tempdir
12
+ extend Forwardable
13
+
14
+ ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
15
+
16
+ require 'roo/excelx/shared'
17
+ require 'roo/excelx/workbook'
18
+ require 'roo/excelx/shared_strings'
19
+ require 'roo/excelx/styles'
20
+ require 'roo/excelx/cell'
21
+ require 'roo/excelx/sheet'
22
+ require 'roo/excelx/relationships'
23
+ require 'roo/excelx/comments'
24
+ require 'roo/excelx/sheet_doc'
25
+ require 'roo/excelx/coordinate'
26
+ require 'roo/excelx/format'
27
+
28
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
29
+ ExceedsMaxError = Class.new(StandardError)
30
+
31
+ # initialization and opening of a spreadsheet file
32
+ # values for packed: :zip
33
+ # optional cell_max (int) parameter for early aborting attempts to parse
34
+ # enormous documents.
35
+ def initialize(filename_or_stream, options = {})
36
+ packed = options[:packed]
37
+ file_warning = options.fetch(:file_warning, :error)
38
+ cell_max = options.delete(:cell_max)
39
+ sheet_options = {}
40
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
41
+ sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
42
+
43
+ unless is_stream?(filename_or_stream)
44
+ file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
45
+ basename = find_basename(filename_or_stream)
46
+ end
47
+
48
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
49
+ # when the object is garbage collected. Initially, the finalizer was
50
+ # created in the Roo::Tempdir module, but that led to a segfault
51
+ # when testing in Ruby 2.4.0.
52
+ @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
53
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
54
+
55
+ @shared = Shared.new(@tmpdir)
56
+ @filename = local_filename(filename_or_stream, @tmpdir, packed)
57
+ process_zipfile(@filename || filename_or_stream)
58
+
59
+ @sheet_names = workbook.sheets.map do |sheet|
60
+ unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
61
+ sheet['name']
54
62
  end
55
- elsif format.include?('h') || format.include?('s')
56
- :time
57
- elsif format.include?('%')
58
- :percentage
63
+ end.compact
64
+ @sheets = []
65
+ @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
66
+ @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
67
+ [sheet_name, @sheets[n]]
68
+ end]
69
+
70
+ if cell_max
71
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
72
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
73
+ end
74
+
75
+ super
76
+ rescue
77
+ self.class.finalize_tempdirs(object_id)
78
+ raise
79
+ end
80
+
81
+ def method_missing(method, *args)
82
+ if (label = workbook.defined_names[method.to_s])
83
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
59
84
  else
60
- :float
85
+ # call super for methods like #a1
86
+ super
61
87
  end
62
88
  end
63
89
 
64
- module_function :to_type
65
- end
90
+ def sheets
91
+ @sheet_names
92
+ end
66
93
 
67
- # initialization and opening of a spreadsheet file
68
- # values for packed: :zip
69
- def initialize(filename, options = {}, deprecated_file_warning = :error)
70
- if Hash === options
71
- packed = options[:packed]
72
- file_warning = options[:file_warning] || :error
73
- else
74
- warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprecated. Use an options hash instead.'
75
- packed = options
76
- file_warning = deprecated_file_warning
77
- end
78
-
79
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
80
- make_tmpdir do |tmpdir|
81
- filename = download_uri(filename, tmpdir) if uri?(filename)
82
- filename = unzip(filename, tmpdir) if packed == :zip
83
- @filename = filename
84
- unless File.file?(@filename)
85
- raise IOError, "file #{@filename} does not exist"
86
- end
87
- @comments_files = Array.new
88
- @rels_files = Array.new
89
- extract_content(tmpdir, @filename)
90
- @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml"))
91
- @shared_table = []
92
- if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml'))
93
- @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml'))
94
- read_shared_strings(@sharedstring_doc)
95
- end
96
- @styles_table = []
97
- @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
98
- if File.exist?(File.join(tmpdir, 'roo_styles.xml'))
99
- @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml'))
100
- read_styles(@styles_doc)
101
- end
102
- @sheet_doc = load_xmls(@sheet_files)
103
- @comments_doc = load_xmls(@comments_files)
104
- @rels_doc = load_xmls(@rels_files)
105
- end
106
- super(filename, options)
107
- @formula = Hash.new
108
- @excelx_type = Hash.new
109
- @excelx_value = Hash.new
110
- @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
111
- @comment = Hash.new
112
- @comments_read = Hash.new
113
- @hyperlink = Hash.new
114
- @hyperlinks_read = Hash.new
115
- end
94
+ def sheet_for(sheet)
95
+ sheet ||= default_sheet
96
+ validate_sheet!(sheet)
97
+ @sheets_by_name[sheet]
98
+ end
116
99
 
117
- def method_missing(m,*args)
118
- # is method name a label name
119
- read_labels
120
- if @label.has_key?(m.to_s)
121
- sheet ||= @default_sheet
122
- read_cells(sheet)
123
- row,col = label(m.to_s)
124
- cell(row,col)
125
- else
126
- # call super for methods like #a1
127
- super
100
+ # Returns the content of a spreadsheet-cell.
101
+ # (1,1) is the upper left corner.
102
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
103
+ # cell at the first line and first row.
104
+ def cell(row, col, sheet = nil)
105
+ key = normalize(row, col)
106
+ safe_send(sheet_for(sheet).cells[key], :value)
128
107
  end
129
- end
130
108
 
131
- # Returns the content of a spreadsheet-cell.
132
- # (1,1) is the upper left corner.
133
- # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
134
- # cell at the first line and first row.
135
- def cell(row, col, sheet=nil)
136
- sheet ||= @default_sheet
137
- read_cells(sheet)
138
- row,col = normalize(row,col)
139
- if celltype(row,col,sheet) == :date
140
- yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
141
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
142
- elsif celltype(row,col,sheet) == :datetime
143
- date_part,time_part = @cell[sheet][[row,col]].split(' ')
144
- yyyy,mm,dd = date_part.split('-')
145
- hh,mi,ss = time_part.split(':')
146
- return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
147
- end
148
- @cell[sheet][[row,col]]
149
- end
109
+ def row(rownumber, sheet = nil)
110
+ sheet_for(sheet).row(rownumber)
111
+ end
150
112
 
151
- # Returns the formula at (row,col).
152
- # Returns nil if there is no formula.
153
- # The method #formula? checks if there is a formula.
154
- def formula(row,col,sheet=nil)
155
- sheet ||= @default_sheet
156
- read_cells(sheet)
157
- row,col = normalize(row,col)
158
- @formula[sheet][[row,col]] && @formula[sheet][[row,col]]
159
- end
160
- alias_method :formula?, :formula
161
-
162
- # returns each formula in the selected sheet as an array of elements
163
- # [row, col, formula]
164
- def formulas(sheet=nil)
165
- sheet ||= @default_sheet
166
- read_cells(sheet)
167
- if @formula[sheet]
168
- @formula[sheet].each.collect do |elem|
169
- [elem[0][0], elem[0][1], elem[1]]
113
+ # returns all values in this column as an array
114
+ # column numbers are 1,2,3,... like in the spreadsheet
115
+ def column(column_number, sheet = nil)
116
+ if column_number.is_a?(::String)
117
+ column_number = ::Roo::Utils.letter_to_number(column_number)
170
118
  end
171
- else
172
- []
119
+ sheet_for(sheet).column(column_number)
173
120
  end
174
- end
175
121
 
176
- class Font
177
- attr_accessor :bold, :italic, :underline
122
+ # returns the number of the first non-empty row
123
+ def first_row(sheet = nil)
124
+ sheet_for(sheet).first_row
125
+ end
178
126
 
179
- def bold?
180
- @bold == true
127
+ # returns the number of the last non-empty row
128
+ def last_row(sheet = nil)
129
+ sheet_for(sheet).last_row
181
130
  end
182
131
 
183
- def italic?
184
- @italic == true
132
+ # returns the number of the first non-empty column
133
+ def first_column(sheet = nil)
134
+ sheet_for(sheet).first_column
185
135
  end
186
136
 
187
- def underline?
188
- @underline == true
137
+ # returns the number of the last non-empty column
138
+ def last_column(sheet = nil)
139
+ sheet_for(sheet).last_column
189
140
  end
190
- end
191
141
 
192
- # Given a cell, return the cell's style
193
- def font(row, col, sheet=nil)
194
- sheet ||= @default_sheet
195
- read_cells(sheet)
196
- row,col = normalize(row,col)
197
- s_attribute = @s_attribute[sheet][[row,col]]
198
- s_attribute ||= 0
199
- s_attribute = s_attribute.to_i
200
- @style_definitions[s_attribute]
201
- end
142
+ # set a cell to a certain value
143
+ # (this will not be saved back to the spreadsheet file!)
144
+ def set(row, col, value, sheet = nil) #:nodoc:
145
+ key = normalize(row, col)
146
+ cell_type = cell_type_by_value(value)
147
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
148
+ end
202
149
 
203
- # returns the type of a cell:
204
- # * :float
205
- # * :string,
206
- # * :date
207
- # * :percentage
208
- # * :formula
209
- # * :time
210
- # * :datetime
211
- def celltype(row,col,sheet=nil)
212
- sheet ||= @default_sheet
213
- read_cells(sheet)
214
- row,col = normalize(row,col)
215
- if @formula[sheet][[row,col]]
216
- return :formula
217
- else
218
- @cell_type[sheet][[row,col]]
150
+ # Returns the formula at (row,col).
151
+ # Returns nil if there is no formula.
152
+ # The method #formula? checks if there is a formula.
153
+ def formula(row, col, sheet = nil)
154
+ key = normalize(row, col)
155
+ safe_send(sheet_for(sheet).cells[key], :formula)
219
156
  end
220
- end
221
157
 
222
- # returns the internal type of an excel cell
223
- # * :numeric_or_formula
224
- # * :string
225
- # Note: this is only available within the Excelx class
226
- def excelx_type(row,col,sheet=nil)
227
- sheet ||= @default_sheet
228
- read_cells(sheet)
229
- row,col = normalize(row,col)
230
- return @excelx_type[sheet][[row,col]]
231
- end
158
+ # Predicate methods really should return a boolean
159
+ # value. Hopefully no one was relying on the fact that this
160
+ # previously returned either nil/formula
161
+ def formula?(*args)
162
+ !!formula(*args)
163
+ end
232
164
 
233
- # returns the internal value of an excelx cell
234
- # Note: this is only available within the Excelx class
235
- def excelx_value(row,col,sheet=nil)
236
- sheet ||= @default_sheet
237
- read_cells(sheet)
238
- row,col = normalize(row,col)
239
- return @excelx_value[sheet][[row,col]]
240
- end
165
+ # returns each formula in the selected sheet as an array of tuples in following format
166
+ # [[row, col, formula], [row, col, formula],...]
167
+ def formulas(sheet = nil)
168
+ sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
169
+ [x, y, cell.formula]
170
+ end
171
+ end
241
172
 
242
- # returns the internal format of an excel cell
243
- def excelx_format(row,col,sheet=nil)
244
- sheet ||= @default_sheet
245
- read_cells(sheet)
246
- row,col = normalize(row,col)
247
- s = @s_attribute[sheet][[row,col]]
248
- attribute2format(s).to_s
249
- end
173
+ # Given a cell, return the cell's style
174
+ def font(row, col, sheet = nil)
175
+ key = normalize(row, col)
176
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
177
+ styles.definitions[definition_index] if definition_index
178
+ end
250
179
 
251
- # returns an array of sheet names in the spreadsheet
252
- def sheets
253
- @workbook_doc.xpath("//xmlns:sheet").map do |sheet|
254
- sheet['name']
180
+ # returns the type of a cell:
181
+ # * :float
182
+ # * :string,
183
+ # * :date
184
+ # * :percentage
185
+ # * :formula
186
+ # * :time
187
+ # * :datetime
188
+ def celltype(row, col, sheet = nil)
189
+ key = normalize(row, col)
190
+ safe_send(sheet_for(sheet).cells[key], :type)
255
191
  end
256
- end
257
192
 
258
- # shows the internal representation of all cells
259
- # for debugging purposes
260
- def to_s(sheet=nil)
261
- sheet ||= @default_sheet
262
- read_cells(sheet)
263
- @cell[sheet].inspect
264
- end
193
+ # returns the internal type of an excel cell
194
+ # * :numeric_or_formula
195
+ # * :string
196
+ # Note: this is only available within the Excelx class
197
+ def excelx_type(row, col, sheet = nil)
198
+ key = normalize(row, col)
199
+ safe_send(sheet_for(sheet).cells[key], :cell_type)
200
+ end
265
201
 
266
- # returns the row,col values of the labelled cell
267
- # (nil,nil) if label is not defined
268
- def label(labelname)
269
- read_labels
270
- if @label.empty? || !@label.has_key?(labelname)
271
- return nil,nil,nil
272
- else
273
- return @label[labelname][1].to_i,
274
- Roo::Base.letter_to_number(@label[labelname][2]),
275
- @label[labelname][0]
202
+ # returns the internal value of an excelx cell
203
+ # Note: this is only available within the Excelx class
204
+ def excelx_value(row, col, sheet = nil)
205
+ key = normalize(row, col)
206
+ safe_send(sheet_for(sheet).cells[key], :cell_value)
276
207
  end
277
- end
278
208
 
279
- # Returns an array which all labels. Each element is an array with
280
- # [labelname, [row,col,sheetname]]
281
- def labels
282
- # sheet ||= @default_sheet
283
- # read_cells(sheet)
284
- read_labels
285
- @label.map do |label|
286
- [ label[0], # name
287
- [ label[1][1].to_i, # row
288
- Roo::Base.letter_to_number(label[1][2]), # column
289
- label[1][0], # sheet
290
- ] ]
209
+ # returns the internal value of an excelx cell
210
+ # Note: this is only available within the Excelx class
211
+ def formatted_value(row, col, sheet = nil)
212
+ key = normalize(row, col)
213
+ safe_send(sheet_for(sheet).cells[key], :formatted_value)
291
214
  end
292
- end
293
215
 
294
- def hyperlink?(row,col,sheet=nil)
295
- hyperlink(row, col, sheet) != nil
296
- end
216
+ # returns the internal format of an excel cell
217
+ def excelx_format(row, col, sheet = nil)
218
+ key = normalize(row, col)
219
+ sheet_for(sheet).excelx_format(key)
220
+ end
297
221
 
298
- # returns the hyperlink at (row/col)
299
- # nil if there is no hyperlink
300
- def hyperlink(row,col,sheet=nil)
301
- sheet ||= @default_sheet
302
- read_hyperlinks(sheet) unless @hyperlinks_read[sheet]
303
- row,col = normalize(row,col)
304
- return nil unless @hyperlink[sheet]
305
- @hyperlink[sheet][[row,col]]
306
- end
222
+ def empty?(row, col, sheet = nil)
223
+ sheet = sheet_for(sheet)
224
+ key = normalize(row, col)
225
+ cell = sheet.cells[key]
226
+ !cell || cell.empty? ||
227
+ (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
228
+ end
307
229
 
308
- # returns the comment at (row/col)
309
- # nil if there is no comment
310
- def comment(row,col,sheet=nil)
311
- sheet ||= @default_sheet
312
- #read_cells(sheet)
313
- read_comments(sheet) unless @comments_read[sheet]
314
- row,col = normalize(row,col)
315
- return nil unless @comment[sheet]
316
- @comment[sheet][[row,col]]
317
- end
230
+ # shows the internal representation of all cells
231
+ # for debugging purposes
232
+ def to_s(sheet = nil)
233
+ sheet_for(sheet).cells.inspect
234
+ end
318
235
 
319
- # true, if there is a comment
320
- def comment?(row,col,sheet=nil)
321
- sheet ||= @default_sheet
322
- # read_cells(sheet)
323
- read_comments(sheet) unless @comments_read[sheet]
324
- row,col = normalize(row,col)
325
- comment(row,col) != nil
326
- end
236
+ # returns the row,col values of the labelled cell
237
+ # (nil,nil) if label is not defined
238
+ def label(name)
239
+ labels = workbook.defined_names
240
+ return [nil, nil, nil] if labels.empty? || !labels.key?(name)
327
241
 
328
- # returns each comment in the selected sheet as an array of elements
329
- # [row, col, comment]
330
- def comments(sheet=nil)
331
- sheet ||= @default_sheet
332
- read_comments(sheet) unless @comments_read[sheet]
333
- if @comment[sheet]
334
- @comment[sheet].each.collect do |elem|
335
- [elem[0][0],elem[0][1],elem[1]]
336
- end
337
- else
338
- []
242
+ [labels[name].row, labels[name].col, labels[name].sheet]
339
243
  end
340
- end
341
244
 
342
- private
245
+ # Returns an array which all labels. Each element is an array with
246
+ # [labelname, [row,col,sheetname]]
247
+ def labels
248
+ @labels ||= workbook.defined_names.map do |name, label|
249
+ [
250
+ name,
251
+ [label.row, label.col, label.sheet]
252
+ ]
253
+ end
254
+ end
343
255
 
344
- def load_xmls(paths)
345
- paths.compact.map do |item|
346
- load_xml(item)
256
+ def hyperlink?(row, col, sheet = nil)
257
+ !!hyperlink(row, col, sheet)
347
258
  end
348
- end
349
259
 
350
- # helper function to set the internal representation of cells
351
- def set_cell_values(sheet,x,y,i,v,value_type,formula,
352
- excelx_type=nil,
353
- excelx_value=nil,
354
- s_attribute=nil)
355
- key = [y,x+i]
356
- @cell_type[sheet] ||= {}
357
- @cell_type[sheet][key] = value_type
358
- @formula[sheet] ||= {}
359
- @formula[sheet][key] = formula if formula
360
- @cell[sheet] ||= {}
361
- @cell[sheet][key] =
362
- case @cell_type[sheet][key]
363
- when :float
364
- v.to_f
365
- when :string
366
- v
367
- when :date
368
- (base_date+v.to_i).strftime("%Y-%m-%d")
369
- when :datetime
370
- (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
371
- when :percentage
372
- v.to_f
373
- when :time
374
- v.to_f*(24*60*60)
375
- else
376
- v
377
- end
260
+ # returns the hyperlink at (row/col)
261
+ # nil if there is no hyperlink
262
+ def hyperlink(row, col, sheet = nil)
263
+ key = normalize(row, col)
264
+ sheet_for(sheet).hyperlinks[key]
265
+ end
378
266
 
379
- @cell[sheet][key] = Spreadsheet::Link.new(@hyperlink[sheet][key], @cell[sheet][key].to_s) if hyperlink?(y,x+i)
380
- @excelx_type[sheet] ||= {}
381
- @excelx_type[sheet][key] = excelx_type
382
- @excelx_value[sheet] ||= {}
383
- @excelx_value[sheet][key] = excelx_value
384
- @s_attribute[sheet] ||= {}
385
- @s_attribute[sheet][key] = s_attribute
386
- end
267
+ # returns the comment at (row/col)
268
+ # nil if there is no comment
269
+ def comment(row, col, sheet = nil)
270
+ key = normalize(row, col)
271
+ sheet_for(sheet).comments[key]
272
+ end
387
273
 
388
- # read all cells in the selected sheet
389
- def read_cells(sheet=nil)
390
- sheet ||= @default_sheet
391
- validate_sheet!(sheet)
392
- return if @cells_read[sheet]
393
-
394
- @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c|
395
- s_attribute = c['s'].to_i # should be here
396
- # c: <c r="A5" s="2">
397
- # <v>22606</v>
398
- # </c>, format: , tmp_type: float
399
- value_type =
400
- case c['t']
401
- when 's'
402
- :shared
403
- when 'b'
404
- :boolean
405
- # 2011-02-25 BEGIN
406
- when 'str'
407
- :string
408
- # 2011-02-25 END
409
- # 2011-09-15 BEGIN
410
- when 'inlineStr'
411
- :inlinestr
412
- # 2011-09-15 END
413
- else
414
- format = attribute2format(s_attribute)
415
- Format.to_type(format)
416
- end
417
- formula = nil
418
- c.children.each do |cell|
419
- case cell.name
420
- when 'is'
421
- cell.children.each do |is|
422
- if is.name == 't'
423
- inlinestr_content = is.content
424
- value_type = :string
425
- v = inlinestr_content
426
- excelx_type = :string
427
- y, x = Roo::Base.split_coordinate(c['r'])
428
- excelx_value = inlinestr_content #cell.content
429
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
430
- end
431
- end
432
- when 'f'
433
- formula = cell.content
434
- when 'v'
435
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
436
- value_type =
437
- if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
438
- :datetime
439
- else
440
- :date
441
- end
442
- end
443
- excelx_type = [:numeric_or_formula,format.to_s]
444
- excelx_value = cell.content
445
- v =
446
- case value_type
447
- when :shared
448
- value_type = :string
449
- excelx_type = :string
450
- @shared_table[cell.content.to_i]
451
- when :boolean
452
- (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
453
- when :date
454
- cell.content
455
- when :time
456
- cell.content
457
- when :datetime
458
- cell.content
459
- when :formula
460
- cell.content.to_f #TODO: !!!!
461
- when :string
462
- excelx_type = :string
463
- cell.content
464
- else
465
- value_type = :float
466
- cell.content
467
- end
468
- y, x = Roo::Base.split_coordinate(c['r'])
469
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
470
- end
471
- end
274
+ # true, if there is a comment
275
+ def comment?(row, col, sheet = nil)
276
+ !!comment(row, col, sheet)
472
277
  end
473
- @cells_read[sheet] = true
474
- # begin comments
475
- =begin
476
- Datei xl/comments1.xml
477
- <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
478
- <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
479
- <authors>
480
- <author />
481
- </authors>
482
- <commentList>
483
- <comment ref="B4" authorId="0">
484
- <text>
485
- <r>
486
- <rPr>
487
- <sz val="10" />
488
- <rFont val="Arial" />
489
- <family val="2" />
490
- </rPr>
491
- <t>Kommentar fuer B4</t>
492
- </r>
493
- </text>
494
- </comment>
495
- <comment ref="B5" authorId="0">
496
- <text>
497
- <r>
498
- <rPr>
499
- <sz val="10" />
500
- <rFont val="Arial" />
501
- <family val="2" />
502
- </rPr>
503
- <t>Kommentar fuer B5</t>
504
- </r>
505
- </text>
506
- </comment>
507
- </commentList>
508
- </comments>
509
- =end
510
- =begin
511
- if @comments_doc[self.sheets.index(sheet)]
512
- read_comments(sheet)
513
- end
514
- =end
515
- #end comments
516
- end
517
278
 
518
- # Reads all comments from a sheet
519
- def read_comments(sheet=nil)
520
- sheet ||= @default_sheet
521
- validate_sheet!(sheet)
522
- n = self.sheets.index(sheet)
523
- return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
524
- @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment|
525
- ref = comment.attributes['ref'].to_s
526
- row,col = Roo::Base.split_coordinate(ref)
527
- comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text|
528
- @comment[sheet] ||= {}
529
- @comment[sheet][[row,col]] = text.text
279
+ def comments(sheet = nil)
280
+ sheet_for(sheet).comments.map do |(x, y), comment|
281
+ [x, y, comment]
530
282
  end
531
283
  end
532
- @comments_read[sheet] = true
533
- end
534
284
 
535
- # Reads all hyperlinks from a sheet
536
- def read_hyperlinks(sheet=nil)
537
- sheet ||= @default_sheet
538
- validate_sheet!(sheet)
539
- n = self.sheets.index(sheet)
540
- if rels_doc = @rels_doc[n]
541
- rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r|
542
- [r.attribute('Id').text, r]
543
- end]
544
- @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h|
545
- if rel_element = rels[h.attribute('id').text]
546
- row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s)
547
- @hyperlink[sheet] ||= {}
548
- @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text
549
- end
285
+ # Yield an array of Excelx::Cell
286
+ # Takes options for sheet, pad_cells, and max_rows
287
+ def each_row_streaming(options = {})
288
+ sheet = sheet_for(options.delete(:sheet))
289
+ if block_given?
290
+ sheet.each_row(options) { |row| yield row }
291
+ else
292
+ sheet.to_enum(:each_row, options)
550
293
  end
551
294
  end
552
- @hyperlinks_read[sheet] = true
553
- end
554
295
 
555
- def read_labels
556
- @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name|
557
- # "Sheet1!$C$5"
558
- sheet, coordinates = defined_name.text.split('!$', 2)
559
- col,row = coordinates.split('$')
560
- [defined_name['name'], [sheet,row,col]]
561
- end]
562
- end
296
+ private
563
297
 
564
- # Extracts all needed files from the zip file
565
- def process_zipfile(tmpdir, zipfilename, zip, path='')
566
- @sheet_files = []
567
- Roo::ZipFile.open(zipfilename) {|zf|
568
- zf.entries.each {|entry|
569
- entry_name = entry.to_s.downcase
298
+ def clean_sheet(sheet)
299
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
300
+ next unless value.value.is_a?(::String)
570
301
 
571
- path =
572
- if entry_name.end_with?('workbook.xml')
573
- "#{tmpdir}/roo_workbook.xml"
574
- elsif entry_name.end_with?('sharedstrings.xml')
575
- "#{tmpdir}/roo_sharedStrings.xml"
576
- elsif entry_name.end_with?('styles.xml')
577
- "#{tmpdir}/roo_styles.xml"
578
- elsif entry_name =~ /sheet([0-9]+).xml$/
579
- nr = $1
580
- @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}"
581
- elsif entry_name =~ /comments([0-9]+).xml$/
582
- nr = $1
583
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
584
- elsif entry_name =~ /sheet([0-9]+).xml.rels$/
585
- nr = $1
586
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
587
- end
588
- if path
589
- extract_file(zip, entry, path)
590
- end
591
- }
592
- }
593
- end
302
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
303
+ end
594
304
 
595
- def extract_file(source_zip, entry, destination_path)
596
- open(destination_path,'wb') {|f|
597
- f << source_zip.read(entry)
598
- }
599
- end
305
+ @cleaned[sheet] = true
306
+ end
600
307
 
601
- # extract files from the zip file
602
- def extract_content(tmpdir, zipfilename)
603
- Roo::ZipFile.open(@filename) do |zip|
604
- process_zipfile(tmpdir, zipfilename,zip)
308
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
309
+ # documents require a workbook.xml file, so a if the file is missing
310
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
311
+ # raised.
312
+ #
313
+ # wb - a Zip::Entry for the workbook.xml file.
314
+ # path - A String for Zip::Entry's destination path.
315
+ #
316
+ # Examples
317
+ #
318
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
319
+ # # => ["rId1", "rId2", "rId3"]
320
+ #
321
+ # Returns an Array of Strings.
322
+ def extract_worksheet_ids(entries, path)
323
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
324
+ fail ArgumentError 'missing required workbook file' if wb.nil?
325
+
326
+ wb.extract(path)
327
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
328
+ workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value }
605
329
  end
606
- end
607
330
 
608
- # read the shared strings xml document
609
- def read_shared_strings(doc)
610
- doc.xpath("/xmlns:sst/xmlns:si").each do |si|
611
- shared_table_entry = ''
612
- si.children.each do |elem|
613
- if elem.name == 'r' and elem.children
614
- elem.children.each do |r_elem|
615
- if r_elem.name == 't'
616
- shared_table_entry << r_elem.content
617
- end
618
- end
619
- end
620
- if elem.name == 't'
621
- shared_table_entry = elem.content
622
- end
331
+ # Internal
332
+ #
333
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
334
+ # path - A String for the Zip::Entry's destination path.
335
+ #
336
+ # Examples
337
+ #
338
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
339
+ # # => {
340
+ # "rId1"=>"worksheets/sheet1.xml",
341
+ # "rId2"=>"worksheets/sheet2.xml",
342
+ # "rId3"=>"worksheets/sheet3.xml"
343
+ # }
344
+ #
345
+ # Returns a Hash.
346
+ def extract_worksheet_rels(entries, path)
347
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
348
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
349
+
350
+ wb_rels.extract(path)
351
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
352
+ worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
353
+
354
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
355
+ relationship.attributes['Type'].value == worksheet_type
623
356
  end
624
- @shared_table << shared_table_entry
625
- end
626
- end
627
357
 
628
- # read the styles elements of an excelx document
629
- def read_styles(doc)
630
- @cellXfs = []
631
-
632
- @numFmts = Hash[doc.xpath("//xmlns:numFmt").map do |numFmt|
633
- [numFmt['numFmtId'], numFmt['formatCode']]
634
- end]
635
- fonts = doc.xpath("//xmlns:fonts/xmlns:font").map do |font_el|
636
- Font.new.tap do |font|
637
- font.bold = !font_el.xpath('./xmlns:b').empty?
638
- font.italic = !font_el.xpath('./xmlns:i').empty?
639
- font.underline = !font_el.xpath('./xmlns:u').empty?
358
+ relationships.inject({}) do |hash, relationship|
359
+ attributes = relationship.attributes
360
+ id = attributes['Id']
361
+ hash[id.value] = attributes['Target'].value
362
+ hash
640
363
  end
641
364
  end
642
365
 
643
- doc.xpath("//xmlns:cellXfs").each do |xfs|
644
- xfs.children.each do |xf|
645
- @cellXfs << xf['numFmtId']
646
- @style_definitions << fonts[xf['fontId'].to_i]
366
+ # Extracts the sheets in order, but it will ignore sheets that are not
367
+ # worksheets.
368
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
369
+ (sheet_ids & sheets.keys).each_with_index do |id, i|
370
+ name = sheets[id]
371
+ entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
372
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
373
+ sheet_files << path
374
+ @sheet_files << path
375
+ entry.extract(path)
647
376
  end
648
377
  end
649
- end
650
378
 
651
- # convert internal excelx attribute to a format
652
- def attribute2format(s)
653
- id = @cellXfs[s.to_i]
654
- @numFmts[id] || Format::STANDARD_FORMATS[id.to_i]
655
- end
379
+ # Extracts all needed files from the zip file
380
+ def process_zipfile(zipfilename_or_stream)
381
+ @sheet_files = []
656
382
 
657
- def base_date
658
- @base_date ||= read_base_date
659
- end
383
+ unless is_stream?(zipfilename_or_stream)
384
+ zip_file = Zip::File.open(zipfilename_or_stream)
385
+ else
386
+ zip_file = Zip::CentralDirectory.new
387
+ zip_file.read_from_stream zipfilename_or_stream
388
+ end
660
389
 
661
- # Default to 1900 (minus one day due to excel quirk) but use 1904 if
662
- # it's set in the Workbook's workbookPr
663
- # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
664
- def read_base_date
665
- base_date = Date.new(1899,12,30)
666
- @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr|
667
- if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i
668
- base_date = Date.new(1904,01,01)
390
+ process_zipfile_entries zip_file.to_a.sort_by(&:name)
391
+ end
392
+
393
+ def process_zipfile_entries(entries)
394
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
395
+ # are not in order. With Numbers 3.1, the first sheet is always
396
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
397
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
398
+ # first worksheet).
399
+ #
400
+ # workbook.xml lists the correct order of worksheets and
401
+ # workbook.xml.rels lists the filenames for those worksheets.
402
+ #
403
+ # workbook.xml:
404
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
405
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
406
+ # workbook.xml.rel:
407
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
408
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
409
+ sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
410
+ sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
411
+ extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
412
+
413
+ entries.each do |entry|
414
+ path =
415
+ case entry.name.downcase
416
+ when /sharedstrings.xml$/
417
+ "#{@tmpdir}/roo_sharedStrings.xml"
418
+ when /styles.xml$/
419
+ "#{@tmpdir}/roo_styles.xml"
420
+ when /comments([0-9]+).xml$/
421
+ # FIXME: Most of the time, The order of the comment files are the same
422
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
423
+ # In some situations, this isn't true. The true location of a
424
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
425
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
426
+ nr = Regexp.last_match[1].to_i
427
+ comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
428
+ when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
429
+ # NOTE: Chart sheet relationship files were interfering with
430
+ # worksheets.
431
+ nil
432
+ when /sheet([0-9]+).xml.rels$/
433
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
434
+ # it also stores the location for sharedStrings, comments,
435
+ # drawings, etc.
436
+ nr = Regexp.last_match[1].to_i
437
+ rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
438
+ end
439
+
440
+ entry.extract(path) if path
669
441
  end
670
442
  end
671
- base_date
672
- end
673
443
 
674
- end # class
444
+ def safe_send(object, method, *args)
445
+ object.send(method, *args) if object && object.respond_to?(method)
446
+ end
447
+ end
448
+ end