roo 1.13.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +17 -0
  3. data/.github/ISSUE_TEMPLATE +10 -0
  4. data/.gitignore +11 -0
  5. data/.simplecov +4 -0
  6. data/.travis.yml +17 -0
  7. data/CHANGELOG.md +626 -0
  8. data/Gemfile +17 -12
  9. data/Gemfile_ruby2 +30 -0
  10. data/Guardfile +23 -0
  11. data/LICENSE +3 -1
  12. data/README.md +285 -0
  13. data/Rakefile +23 -23
  14. data/examples/roo_soap_client.rb +28 -31
  15. data/examples/roo_soap_server.rb +4 -6
  16. data/examples/write_me.rb +9 -10
  17. data/lib/roo/base.rb +298 -495
  18. data/lib/roo/constants.rb +5 -0
  19. data/lib/roo/csv.rb +127 -113
  20. data/lib/roo/errors.rb +11 -0
  21. data/lib/roo/excelx/cell/base.rb +94 -0
  22. data/lib/roo/excelx/cell/boolean.rb +27 -0
  23. data/lib/roo/excelx/cell/date.rb +28 -0
  24. data/lib/roo/excelx/cell/datetime.rb +111 -0
  25. data/lib/roo/excelx/cell/empty.rb +19 -0
  26. data/lib/roo/excelx/cell/number.rb +87 -0
  27. data/lib/roo/excelx/cell/string.rb +19 -0
  28. data/lib/roo/excelx/cell/time.rb +43 -0
  29. data/lib/roo/excelx/cell.rb +106 -0
  30. data/lib/roo/excelx/comments.rb +55 -0
  31. data/lib/roo/excelx/coordinate.rb +12 -0
  32. data/lib/roo/excelx/extractor.rb +21 -0
  33. data/lib/roo/excelx/format.rb +64 -0
  34. data/lib/roo/excelx/relationships.rb +25 -0
  35. data/lib/roo/excelx/shared.rb +32 -0
  36. data/lib/roo/excelx/shared_strings.rb +157 -0
  37. data/lib/roo/excelx/sheet.rb +112 -0
  38. data/lib/roo/excelx/sheet_doc.rb +211 -0
  39. data/lib/roo/excelx/styles.rb +64 -0
  40. data/lib/roo/excelx/workbook.rb +59 -0
  41. data/lib/roo/excelx.rb +376 -602
  42. data/lib/roo/font.rb +17 -0
  43. data/lib/roo/formatters/base.rb +15 -0
  44. data/lib/roo/formatters/csv.rb +84 -0
  45. data/lib/roo/formatters/matrix.rb +23 -0
  46. data/lib/roo/formatters/xml.rb +31 -0
  47. data/lib/roo/formatters/yaml.rb +40 -0
  48. data/lib/roo/libre_office.rb +4 -0
  49. data/lib/roo/link.rb +34 -0
  50. data/lib/roo/open_office.rb +626 -0
  51. data/lib/roo/spreadsheet.rb +22 -23
  52. data/lib/roo/tempdir.rb +21 -0
  53. data/lib/roo/utils.rb +78 -0
  54. data/lib/roo/version.rb +3 -0
  55. data/lib/roo.rb +23 -24
  56. data/roo.gemspec +21 -204
  57. data/spec/helpers.rb +5 -0
  58. data/spec/lib/roo/base_spec.rb +229 -3
  59. data/spec/lib/roo/csv_spec.rb +38 -11
  60. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  61. data/spec/lib/roo/excelx_spec.rb +510 -11
  62. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  63. data/spec/lib/roo/openoffice_spec.rb +30 -8
  64. data/spec/lib/roo/spreadsheet_spec.rb +60 -12
  65. data/spec/lib/roo/utils_spec.rb +106 -0
  66. data/spec/spec_helper.rb +7 -6
  67. data/test/all_ss.rb +12 -11
  68. data/test/excelx/cell/test_base.rb +63 -0
  69. data/test/excelx/cell/test_boolean.rb +36 -0
  70. data/test/excelx/cell/test_date.rb +38 -0
  71. data/test/excelx/cell/test_datetime.rb +45 -0
  72. data/test/excelx/cell/test_empty.rb +7 -0
  73. data/test/excelx/cell/test_number.rb +74 -0
  74. data/test/excelx/cell/test_string.rb +28 -0
  75. data/test/excelx/cell/test_time.rb +30 -0
  76. data/test/formatters/test_csv.rb +119 -0
  77. data/test/formatters/test_matrix.rb +76 -0
  78. data/test/formatters/test_xml.rb +74 -0
  79. data/test/formatters/test_yaml.rb +20 -0
  80. data/test/roo/test_csv.rb +52 -0
  81. data/test/roo/test_excelx.rb +186 -0
  82. data/test/roo/test_libre_office.rb +9 -0
  83. data/test/roo/test_open_office.rb +126 -0
  84. data/test/test_helper.rb +73 -53
  85. data/test/test_roo.rb +1211 -2292
  86. metadata +119 -298
  87. data/CHANGELOG +0 -417
  88. data/Gemfile.lock +0 -78
  89. data/README.markdown +0 -126
  90. data/VERSION +0 -1
  91. data/lib/roo/excel.rb +0 -355
  92. data/lib/roo/excel2003xml.rb +0 -300
  93. data/lib/roo/google.rb +0 -292
  94. data/lib/roo/openoffice.rb +0 -496
  95. data/lib/roo/roo_rails_helper.rb +0 -83
  96. data/lib/roo/worksheet.rb +0 -18
  97. data/scripts/txt2html +0 -67
  98. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  99. data/spec/lib/roo/excel_spec.rb +0 -17
  100. data/spec/lib/roo/google_spec.rb +0 -64
  101. data/test/files/1900_base.xls +0 -0
  102. data/test/files/1900_base.xlsx +0 -0
  103. data/test/files/1904_base.xls +0 -0
  104. data/test/files/1904_base.xlsx +0 -0
  105. data/test/files/Bibelbund.csv +0 -3741
  106. data/test/files/Bibelbund.ods +0 -0
  107. data/test/files/Bibelbund.xls +0 -0
  108. data/test/files/Bibelbund.xlsx +0 -0
  109. data/test/files/Bibelbund.xml +0 -62518
  110. data/test/files/Bibelbund1.ods +0 -0
  111. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  112. data/test/files/bad_excel_date.xls +0 -0
  113. data/test/files/bbu.ods +0 -0
  114. data/test/files/bbu.xls +0 -0
  115. data/test/files/bbu.xlsx +0 -0
  116. data/test/files/bbu.xml +0 -152
  117. data/test/files/bode-v1.ods.zip +0 -0
  118. data/test/files/bode-v1.xls.zip +0 -0
  119. data/test/files/boolean.csv +0 -2
  120. data/test/files/boolean.ods +0 -0
  121. data/test/files/boolean.xls +0 -0
  122. data/test/files/boolean.xlsx +0 -0
  123. data/test/files/boolean.xml +0 -112
  124. data/test/files/borders.ods +0 -0
  125. data/test/files/borders.xls +0 -0
  126. data/test/files/borders.xlsx +0 -0
  127. data/test/files/borders.xml +0 -144
  128. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  129. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  130. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  131. data/test/files/comments.ods +0 -0
  132. data/test/files/comments.xls +0 -0
  133. data/test/files/comments.xlsx +0 -0
  134. data/test/files/csvtypes.csv +0 -1
  135. data/test/files/datetime.ods +0 -0
  136. data/test/files/datetime.xls +0 -0
  137. data/test/files/datetime.xlsx +0 -0
  138. data/test/files/datetime.xml +0 -142
  139. data/test/files/datetime_floatconv.xls +0 -0
  140. data/test/files/datetime_floatconv.xml +0 -148
  141. data/test/files/dreimalvier.ods +0 -0
  142. data/test/files/emptysheets.ods +0 -0
  143. data/test/files/emptysheets.xls +0 -0
  144. data/test/files/emptysheets.xlsx +0 -0
  145. data/test/files/emptysheets.xml +0 -105
  146. data/test/files/excel2003.xml +0 -21140
  147. data/test/files/false_encoding.xls +0 -0
  148. data/test/files/false_encoding.xml +0 -132
  149. data/test/files/file_item_error.xlsx +0 -0
  150. data/test/files/formula.ods +0 -0
  151. data/test/files/formula.xls +0 -0
  152. data/test/files/formula.xlsx +0 -0
  153. data/test/files/formula.xml +0 -134
  154. data/test/files/formula_parse_error.xls +0 -0
  155. data/test/files/formula_parse_error.xml +0 -1833
  156. data/test/files/formula_string_error.xlsx +0 -0
  157. data/test/files/html-escape.ods +0 -0
  158. data/test/files/link.xls +0 -0
  159. data/test/files/link.xlsx +0 -0
  160. data/test/files/matrix.ods +0 -0
  161. data/test/files/matrix.xls +0 -0
  162. data/test/files/named_cells.ods +0 -0
  163. data/test/files/named_cells.xls +0 -0
  164. data/test/files/named_cells.xlsx +0 -0
  165. data/test/files/no_spreadsheet_file.txt +0 -1
  166. data/test/files/numbers1.csv +0 -18
  167. data/test/files/numbers1.ods +0 -0
  168. data/test/files/numbers1.xls +0 -0
  169. data/test/files/numbers1.xlsx +0 -0
  170. data/test/files/numbers1.xml +0 -312
  171. data/test/files/numeric-link.xlsx +0 -0
  172. data/test/files/only_one_sheet.ods +0 -0
  173. data/test/files/only_one_sheet.xls +0 -0
  174. data/test/files/only_one_sheet.xlsx +0 -0
  175. data/test/files/only_one_sheet.xml +0 -67
  176. data/test/files/paragraph.ods +0 -0
  177. data/test/files/paragraph.xls +0 -0
  178. data/test/files/paragraph.xlsx +0 -0
  179. data/test/files/paragraph.xml +0 -127
  180. data/test/files/prova.xls +0 -0
  181. data/test/files/ric.ods +0 -0
  182. data/test/files/simple_spreadsheet.ods +0 -0
  183. data/test/files/simple_spreadsheet.xls +0 -0
  184. data/test/files/simple_spreadsheet.xlsx +0 -0
  185. data/test/files/simple_spreadsheet.xml +0 -225
  186. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  187. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  188. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  189. data/test/files/so_datetime.csv +0 -7
  190. data/test/files/style.ods +0 -0
  191. data/test/files/style.xls +0 -0
  192. data/test/files/style.xlsx +0 -0
  193. data/test/files/style.xml +0 -154
  194. data/test/files/time-test.csv +0 -2
  195. data/test/files/time-test.ods +0 -0
  196. data/test/files/time-test.xls +0 -0
  197. data/test/files/time-test.xlsx +0 -0
  198. data/test/files/time-test.xml +0 -131
  199. data/test/files/type_excel.ods +0 -0
  200. data/test/files/type_excel.xlsx +0 -0
  201. data/test/files/type_excelx.ods +0 -0
  202. data/test/files/type_excelx.xls +0 -0
  203. data/test/files/type_openoffice.xls +0 -0
  204. data/test/files/type_openoffice.xlsx +0 -0
  205. data/test/files/whitespace.ods +0 -0
  206. data/test/files/whitespace.xls +0 -0
  207. data/test/files/whitespace.xlsx +0 -0
  208. data/test/files/whitespace.xml +0 -184
  209. data/test/rm_sub_test.rb +0 -12
  210. data/test/rm_test.rb +0 -7
  211. data/test/test_generic_spreadsheet.rb +0 -259
  212. data/website/index.html +0 -385
  213. data/website/index.txt +0 -423
  214. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  215. data/website/stylesheets/screen.css +0 -130
  216. data/website/template.rhtml +0 -48
@@ -0,0 +1,626 @@
1
+ require 'date'
2
+ require 'nokogiri'
3
+ require 'cgi'
4
+ require 'zip/filesystem'
5
+ require 'roo/font'
6
+ require 'roo/tempdir'
7
+ require 'base64'
8
+ require 'openssl'
9
+
10
+ module Roo
11
+ class OpenOffice < Roo::Base
12
+ extend Roo::Tempdir
13
+
14
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
15
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
16
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
17
+
18
+ # initialization and opening of a spreadsheet file
19
+ # values for packed: :zip
20
+ def initialize(filename, options = {})
21
+ packed = options[:packed]
22
+ file_warning = options[:file_warning] || :error
23
+
24
+ @only_visible_sheets = options[:only_visible_sheets]
25
+ file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
26
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
27
+ # when the object is garbage collected. Initially, the finalizer was
28
+ # created in the Roo::Tempdir module, but that led to a segfault
29
+ # when testing in Ruby 2.4.0.
30
+ @tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
31
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
32
+ @filename = local_filename(filename, @tmpdir, packed)
33
+ # TODO: @cells_read[:default] = false
34
+ open_oo_file(options)
35
+ super(filename, options)
36
+ initialize_default_variables
37
+
38
+ unless @table_display.any?
39
+ doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
40
+ read_table_styles(style)
41
+ end
42
+ end
43
+
44
+ @sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
45
+ if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
46
+ sheet.attributes['name'].value
47
+ end
48
+ end.compact
49
+ rescue
50
+ self.class.finalize_tempdirs(object_id)
51
+ raise
52
+ end
53
+
54
+ def open_oo_file(options)
55
+ Zip::File.open(@filename) do |zip_file|
56
+ content_entry = zip_file.glob('content.xml').first
57
+ fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
58
+
59
+ roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
60
+ content_entry.extract(roo_content_xml_path)
61
+ decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
62
+ end
63
+ end
64
+
65
+ def initialize_default_variables
66
+ @formula = {}
67
+ @style = {}
68
+ @style_defaults = Hash.new { |h, k| h[k] = [] }
69
+ @table_display = Hash.new { |h, k| h[k] = true }
70
+ @font_style_definitions = {}
71
+ @comment = {}
72
+ @comments_read = {}
73
+ end
74
+
75
+ def method_missing(m, *args)
76
+ read_labels
77
+ # is method name a label name
78
+ if @label.key?(m.to_s)
79
+ row, col = label(m.to_s)
80
+ cell(row, col)
81
+ else
82
+ # call super for methods like #a1
83
+ super
84
+ end
85
+ end
86
+
87
+ # Returns the content of a spreadsheet-cell.
88
+ # (1,1) is the upper left corner.
89
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
90
+ # cell at the first line and first row.
91
+ def cell(row, col, sheet = nil)
92
+ sheet ||= default_sheet
93
+ read_cells(sheet)
94
+ row, col = normalize(row, col)
95
+ if celltype(row, col, sheet) == :date
96
+ yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
97
+ return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
98
+ end
99
+
100
+ @cell[sheet][[row, col]]
101
+ end
102
+
103
+ # Returns the formula at (row,col).
104
+ # Returns nil if there is no formula.
105
+ # The method #formula? checks if there is a formula.
106
+ def formula(row, col, sheet = nil)
107
+ sheet ||= default_sheet
108
+ read_cells(sheet)
109
+ row, col = normalize(row, col)
110
+ @formula[sheet][[row, col]]
111
+ end
112
+
113
+ # Predicate methods really should return a boolean
114
+ # value. Hopefully no one was relying on the fact that this
115
+ # previously returned either nil/formula
116
+ def formula?(*args)
117
+ !!formula(*args)
118
+ end
119
+
120
+ # returns each formula in the selected sheet as an array of elements
121
+ # [row, col, formula]
122
+ def formulas(sheet = nil)
123
+ sheet ||= default_sheet
124
+ read_cells(sheet)
125
+ return [] unless @formula[sheet]
126
+ @formula[sheet].each.collect do |elem|
127
+ [elem[0][0], elem[0][1], elem[1]]
128
+ end
129
+ end
130
+
131
+ # Given a cell, return the cell's style
132
+ def font(row, col, sheet = nil)
133
+ sheet ||= default_sheet
134
+ read_cells(sheet)
135
+ row, col = normalize(row, col)
136
+ style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
137
+ @font_style_definitions[style_name]
138
+ end
139
+
140
+ # returns the type of a cell:
141
+ # * :float
142
+ # * :string
143
+ # * :date
144
+ # * :percentage
145
+ # * :formula
146
+ # * :time
147
+ # * :datetime
148
+ def celltype(row, col, sheet = nil)
149
+ sheet ||= default_sheet
150
+ read_cells(sheet)
151
+ row, col = normalize(row, col)
152
+ @formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
153
+ end
154
+
155
+ def sheets
156
+ @sheet_names
157
+ end
158
+
159
+ # version of the Roo::OpenOffice document
160
+ # at 2007 this is always "1.0"
161
+ def officeversion
162
+ oo_version
163
+ @officeversion
164
+ end
165
+
166
+ # shows the internal representation of all cells
167
+ # mainly for debugging purposes
168
+ def to_s(sheet = nil)
169
+ sheet ||= default_sheet
170
+ read_cells(sheet)
171
+ @cell[sheet].inspect
172
+ end
173
+
174
+ # returns the row,col values of the labelled cell
175
+ # (nil,nil) if label is not defined
176
+ def label(labelname)
177
+ read_labels
178
+ return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
179
+ [
180
+ @label[labelname][1].to_i,
181
+ ::Roo::Utils.letter_to_number(@label[labelname][2]),
182
+ @label[labelname][0]
183
+ ]
184
+ end
185
+
186
+ # Returns an array which all labels. Each element is an array with
187
+ # [labelname, [row,col,sheetname]]
188
+ def labels(_sheet = nil)
189
+ read_labels
190
+ @label.map do |label|
191
+ [label[0], # name
192
+ [label[1][1].to_i, # row
193
+ ::Roo::Utils.letter_to_number(label[1][2]), # column
194
+ label[1][0], # sheet
195
+ ]]
196
+ end
197
+ end
198
+
199
+ # returns the comment at (row/col)
200
+ # nil if there is no comment
201
+ def comment(row, col, sheet = nil)
202
+ sheet ||= default_sheet
203
+ read_cells(sheet)
204
+ row, col = normalize(row, col)
205
+ return nil unless @comment[sheet]
206
+ @comment[sheet][[row, col]]
207
+ end
208
+
209
+ # returns each comment in the selected sheet as an array of elements
210
+ # [row, col, comment]
211
+ def comments(sheet = nil)
212
+ sheet ||= default_sheet
213
+ read_comments(sheet) unless @comments_read[sheet]
214
+ return [] unless @comment[sheet]
215
+ @comment[sheet].each.collect do |elem|
216
+ [elem[0][0], elem[0][1], elem[1]]
217
+ end
218
+ end
219
+
220
+ private
221
+
222
+ # If the ODS file has an encryption-data element, then try to decrypt.
223
+ # If successful, the temporary content.xml will be overwritten with
224
+ # decrypted contents.
225
+ def decrypt_if_necessary(
226
+ zip_file,
227
+ content_entry,
228
+ roo_content_xml_path, options
229
+ )
230
+ # Check if content.xml is encrypted by extracting manifest.xml
231
+ # and searching for a manifest:encryption-data element
232
+
233
+ if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
234
+ roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
235
+ manifest_entry.extract(roo_manifest_xml_path)
236
+ manifest = ::Roo::Utils.load_xml(roo_manifest_xml_path)
237
+
238
+ # XPath search for manifest:encryption-data only for the content.xml
239
+ # file
240
+
241
+ encryption_data = manifest.xpath(
242
+ "//manifest:file-entry[@manifest:full-path='content.xml']"\
243
+ "/manifest:encryption-data"
244
+ ).first
245
+
246
+ # If XPath returns a node, then we know content.xml is encrypted
247
+
248
+ unless encryption_data.nil?
249
+
250
+ # Since we know it's encrypted, we check for the password option
251
+ # and if it doesn't exist, raise an argument error
252
+
253
+ password = options[:password]
254
+ if !password.nil?
255
+ perform_decryption(
256
+ encryption_data,
257
+ password,
258
+ content_entry,
259
+ roo_content_xml_path
260
+ )
261
+ else
262
+ fail ArgumentError, 'file is encrypted but password was not supplied'
263
+ end
264
+ end
265
+ else
266
+ fail ArgumentError, 'file missing required META-INF/manifest.xml'
267
+ end
268
+ end
269
+
270
+ # Process the ODS encryption manifest and perform the decryption
271
+ def perform_decryption(
272
+ encryption_data,
273
+ password,
274
+ content_entry,
275
+ roo_content_xml_path
276
+ )
277
+ # Extract various expected attributes from the manifest that
278
+ # describe the encryption
279
+
280
+ algorithm_node = encryption_data.xpath('manifest:algorithm').first
281
+ key_derivation_node =
282
+ encryption_data.xpath('manifest:key-derivation').first
283
+ start_key_generation_node =
284
+ encryption_data.xpath('manifest:start-key-generation').first
285
+
286
+ # If we have all the expected elements, then we can perform
287
+ # the decryption.
288
+
289
+ if !algorithm_node.nil? && !key_derivation_node.nil? &&
290
+ !start_key_generation_node.nil?
291
+
292
+ # The algorithm is a URI describing the algorithm used
293
+ algorithm = algorithm_node['manifest:algorithm-name']
294
+
295
+ # The initialization vector is base-64 encoded
296
+ iv = Base64.decode64(
297
+ algorithm_node['manifest:initialisation-vector']
298
+ )
299
+ key_derivation_name = key_derivation_node['manifest:key-derivation-name']
300
+ iteration_count = key_derivation_node['manifest:iteration-count'].to_i
301
+ salt = Base64.decode64(key_derivation_node['manifest:salt'])
302
+
303
+ # The key is hashed with an algorithm represented by this URI
304
+ key_generation_name =
305
+ start_key_generation_node[
306
+ 'manifest:start-key-generation-name'
307
+ ]
308
+
309
+ hashed_password = password
310
+
311
+ if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
312
+
313
+ hashed_password = Digest::SHA256.digest(password)
314
+ else
315
+ fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
316
+ end
317
+
318
+ cipher = find_cipher(
319
+ algorithm,
320
+ key_derivation_name,
321
+ hashed_password,
322
+ salt,
323
+ iteration_count,
324
+ iv
325
+ )
326
+
327
+ begin
328
+ decrypted = decrypt(content_entry, cipher)
329
+
330
+ # Finally, inflate the decrypted stream and overwrite
331
+ # content.xml
332
+ IO.binwrite(
333
+ roo_content_xml_path,
334
+ Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
335
+ )
336
+ rescue StandardError => error
337
+ raise ArgumentError, "Invalid password or other data error: #{error}"
338
+ end
339
+ else
340
+ fail ArgumentError, 'manifest.xml missing encryption-data elements'
341
+ end
342
+ end
343
+
344
+ # Create a cipher based on an ODS algorithm URI from manifest.xml
345
+ # params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
346
+ def find_cipher(*args)
347
+ fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
348
+
349
+ cipher = ::OpenSSL::Cipher.new('AES-256-CBC')
350
+ cipher.decrypt
351
+ cipher.padding = 0
352
+ cipher.key = find_cipher_key(cipher, *args[1..4])
353
+ cipher.iv = args[5]
354
+
355
+ cipher
356
+ end
357
+
358
+ # Create a cipher key based on an ODS algorithm string from manifest.xml
359
+ def find_cipher_key(*args)
360
+ fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
361
+
362
+ ::OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
363
+ end
364
+
365
+ # Block decrypt raw bytes from the zip file based on the cipher
366
+ def decrypt(content_entry, cipher)
367
+ # Zip::Entry.extract writes a 0-length file when trying
368
+ # to extract an encrypted stream, so we read the
369
+ # raw bytes based on the offset and lengths
370
+ decrypted = ''
371
+ File.open(@filename, 'rb') do |zipfile|
372
+ zipfile.seek(
373
+ content_entry.local_header_offset +
374
+ content_entry.calculate_local_header_size
375
+ )
376
+ total_to_read = content_entry.compressed_size
377
+
378
+ block_size = 4096
379
+ block_size = total_to_read if block_size > total_to_read
380
+
381
+ while (buffer = zipfile.read(block_size))
382
+ decrypted += cipher.update(buffer)
383
+ total_to_read -= buffer.length
384
+
385
+ break if total_to_read == 0
386
+
387
+ block_size = total_to_read if block_size > total_to_read
388
+ end
389
+ end
390
+
391
+ decrypted + cipher.final
392
+ end
393
+
394
+ def doc
395
+ @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
396
+ end
397
+
398
+ # read the version of the OO-Version
399
+ def oo_version
400
+ doc.xpath("//*[local-name()='document-content']").each do |office|
401
+ @officeversion = attribute(office, 'version')
402
+ end
403
+ end
404
+
405
+ # helper function to set the internal representation of cells
406
+ def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
407
+ key = [y, x + i]
408
+ @cell_type[sheet] ||= {}
409
+ @cell_type[sheet][key] = value_type.to_sym if value_type
410
+ @formula[sheet] ||= {}
411
+ if formula
412
+ ['of:', 'oooc:'].each do |prefix|
413
+ if formula[0, prefix.length] == prefix
414
+ formula = formula[prefix.length..-1]
415
+ end
416
+ end
417
+ @formula[sheet][key] = formula
418
+ end
419
+ @cell[sheet] ||= {}
420
+ @style[sheet] ||= {}
421
+ @style[sheet][key] = style_name
422
+ case @cell_type[sheet][key]
423
+ when :float
424
+ @cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
425
+ when :percentage
426
+ @cell[sheet][key] = v.to_f
427
+ when :string
428
+ @cell[sheet][key] = str_v
429
+ when :date
430
+ # TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
431
+ if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
432
+ #-- dann ist noch eine Uhrzeit vorhanden
433
+ #-- "1961-11-21T12:17:18"
434
+ @cell[sheet][key] = DateTime.parse(attribute(table_cell, 'date-value').to_s)
435
+ @cell_type[sheet][key] = :datetime
436
+ else
437
+ @cell[sheet][key] = table_cell.attributes['date-value']
438
+ end
439
+ when :time
440
+ hms = v.split(':')
441
+ @cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
442
+ else
443
+ @cell[sheet][key] = v
444
+ end
445
+ end
446
+
447
+ # read all cells in the selected sheet
448
+ #--
449
+ # the following construct means '4 blanks'
450
+ # some content <text:s text:c="3"/>
451
+ #++
452
+ def read_cells(sheet = default_sheet)
453
+ validate_sheet!(sheet)
454
+ return if @cells_read[sheet]
455
+
456
+ sheet_found = false
457
+ doc.xpath("//*[local-name()='table']").each do |ws|
458
+ next unless sheet == attribute(ws, 'name')
459
+
460
+ sheet_found = true
461
+ col = 1
462
+ row = 1
463
+ ws.children.each do |table_element|
464
+ case table_element.name
465
+ when 'table-column'
466
+ @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
467
+ when 'table-row'
468
+ if table_element.attributes['number-rows-repeated']
469
+ skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
470
+ row = row + skip_row - 1
471
+ end
472
+ table_element.children.each do |cell|
473
+ skip_col = attribute(cell, 'number-columns-repeated')
474
+ formula = attribute(cell, 'formula')
475
+ value_type = attribute(cell, 'value-type')
476
+ v = attribute(cell, 'value')
477
+ style_name = attribute(cell, 'style-name')
478
+ case value_type
479
+ when 'string'
480
+ str_v = ''
481
+ # insert \n if there is more than one paragraph
482
+ para_count = 0
483
+ cell.children.each do |str|
484
+ # begin comments
485
+ #=begin
486
+ #- <table:table-cell office:value-type="string">
487
+ # - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
488
+ # <dc:date>2011-09-20T00:00:00</dc:date>
489
+ # <text:p text:style-name="P1">Kommentar fuer B4</text:p>
490
+ # </office:annotation>
491
+ # <text:p>B4 (mit Kommentar)</text:p>
492
+ # </table:table-cell>
493
+ #=end
494
+ if str.name == 'annotation'
495
+ str.children.each do |annotation|
496
+ next unless annotation.name == 'p'
497
+ # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
498
+ # innerhalb eines Elements besteht ein Eintrag aus einem
499
+ # weiteren Hash mit Key [row,col] und dem eigentlichen
500
+ # Kommentartext als Inhalt
501
+ @comment[sheet] = Hash.new unless @comment[sheet]
502
+ key = [row, col]
503
+ @comment[sheet][key] = annotation.text
504
+ end
505
+ end
506
+ # end comments
507
+ if str.name == 'p'
508
+ v = str.content
509
+ str_v += "\n" if para_count > 0
510
+ para_count += 1
511
+ if str.children.size > 1
512
+ str_v += children_to_string(str.children)
513
+ else
514
+ str.children.each do |child|
515
+ str_v += child.content #.text
516
+ end
517
+ end
518
+ str_v.gsub!(/&apos;/, "'") # special case not supported by unescapeHTML
519
+ str_v = CGI.unescapeHTML(str_v)
520
+ end # == 'p'
521
+ end
522
+ when 'time'
523
+ cell.children.each do |str|
524
+ v = str.content if str.name == 'p'
525
+ end
526
+ when '', nil, 'date', 'percentage', 'float'
527
+ #
528
+ when 'boolean'
529
+ v = attribute(cell, 'boolean-value').to_s
530
+ end
531
+ if skip_col
532
+ if !v.nil? || cell.attributes['date-value']
533
+ 0.upto(skip_col.to_i - 1) do |i|
534
+ set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
535
+ end
536
+ end
537
+ col += (skip_col.to_i - 1)
538
+ end # if skip
539
+ set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
540
+ col += 1
541
+ end
542
+ row += 1
543
+ col = 1
544
+ end
545
+ end
546
+ end
547
+ doc.xpath("//*[local-name()='automatic-styles']").each do |style|
548
+ read_styles(style)
549
+ end
550
+
551
+ fail RangeError unless sheet_found
552
+
553
+ @cells_read[sheet] = true
554
+ @comments_read[sheet] = true
555
+ end
556
+
557
+ # Only calls read_cells because Roo::Base calls read_comments
558
+ # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
559
+ def read_comments(sheet = nil)
560
+ read_cells(sheet)
561
+ end
562
+
563
+ def read_labels
564
+ @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
565
+ #-
566
+ # $Sheet1.$C$5
567
+ #+
568
+ name = attribute(ne, 'name').to_s
569
+ sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
570
+ col, row = coords.split('$')
571
+ sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
572
+ [name, [sheetname, row, col]]
573
+ end]
574
+ end
575
+
576
+ def read_styles(style_elements)
577
+ @font_style_definitions['Default'] = Roo::Font.new
578
+ style_elements.each do |style|
579
+ next unless style.name == 'style'
580
+ style_name = attribute(style, 'name')
581
+ style.each do |properties|
582
+ font = Roo::OpenOffice::Font.new
583
+ font.bold = attribute(properties, 'font-weight')
584
+ font.italic = attribute(properties, 'font-style')
585
+ font.underline = attribute(properties, 'text-underline-style')
586
+ @font_style_definitions[style_name] = font
587
+ end
588
+ end
589
+ end
590
+
591
+ def read_table_styles(styles)
592
+ styles.children.each do |style|
593
+ next unless style.name == 'style'
594
+ style_name = attribute(style, 'name')
595
+ style.children.each do |properties|
596
+ display = attribute(properties, 'display')
597
+ next unless display
598
+ @table_display[style_name] = (display == 'true')
599
+ end
600
+ end
601
+ end
602
+
603
+ # helper method to convert compressed spaces and other elements within
604
+ # an text into a string
605
+ # FIXME: add a test for compressed_spaces == 0. It's not currently tested.
606
+ def children_to_string(children)
607
+ children.map do |child|
608
+ if child.text?
609
+ child.content
610
+ else
611
+ if child.name == 's'
612
+ compressed_spaces = child.attributes['c'].to_s.to_i
613
+ # no explicit number means a count of 1:
614
+ compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
615
+ else
616
+ child.content
617
+ end
618
+ end
619
+ end.join
620
+ end
621
+
622
+ def attribute(node, attr_name)
623
+ node.attributes[attr_name].value if node.attributes[attr_name]
624
+ end
625
+ end
626
+ end
@@ -1,32 +1,31 @@
1
+ require 'uri'
2
+
1
3
  module Roo
2
4
  class Spreadsheet
3
5
  class << self
4
- def open(file, options = {})
5
- file = File === file ? file.path : file
6
+ def open(path, options = {})
7
+ path = path.respond_to?(:path) ? path.path : path
8
+ extension = extension_for(path, options)
6
9
 
7
- extension =
8
- if options[:extension]
9
- options[:file_warning] = :ignore
10
- ".#{options[:extension]}".gsub(/[.]+/, ".")
11
- else
12
- File.extname(URI.parse(file).path)
13
- end
10
+ begin
11
+ Roo::CLASS_FOR_EXTENSION.fetch(extension).new(path, options)
12
+ rescue KeyError
13
+ raise ArgumentError,
14
+ "Can't detect the type of #{path} - please use the :extension option to declare its type."
15
+ end
16
+ end
14
17
 
15
- case extension.downcase
16
- when '.xls'
17
- Roo::Excel.new(file, options)
18
- when '.xlsx'
19
- Roo::Excelx.new(file, options)
20
- when '.ods'
21
- Roo::OpenOffice.new(file, options)
22
- when '.xml'
23
- Roo::Excel2003XML.new(file, options)
24
- when ''
25
- Roo::Google.new(file, options)
26
- when '.csv'
27
- Roo::CSV.new(file, options)
18
+ def extension_for(path, options)
19
+ case (extension = options.delete(:extension))
20
+ when ::Symbol
21
+ options[:file_warning] = :ignore
22
+ extension
23
+ when ::String
24
+ options[:file_warning] = :ignore
25
+ extension.tr('.', '').downcase.to_sym
28
26
  else
29
- raise ArgumentError, "Don't know how to open file #{file}"
27
+ res = ::File.extname((path =~ /\A#{::URI.regexp}\z/) ? ::URI.parse(::URI.encode(path)).path : path)
28
+ res.tr('.', '').downcase.to_sym
30
29
  end
31
30
  end
32
31
  end
@@ -0,0 +1,21 @@
1
+ module Roo
2
+ module Tempdir
3
+ def finalize_tempdirs(object_id)
4
+ if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
5
+ @tempdirs.delete(object_id)
6
+ dirs_to_remove.each do |dir|
7
+ ::FileUtils.remove_entry(dir)
8
+ end
9
+ end
10
+ end
11
+
12
+ def make_tempdir(object, prefix, root)
13
+ root ||= ENV["ROO_TMP"]
14
+ # NOTE: This folder is cleaned up by finalize_tempdirs.
15
+ ::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
16
+ @tempdirs ||= Hash.new { |h, k| h[k] = [] }
17
+ @tempdirs[object.object_id] << tmpdir
18
+ end
19
+ end
20
+ end
21
+ end