roo 1.13.2 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +17 -0
  3. data/.github/issue_template.md +16 -0
  4. data/.github/pull_request_template.md +14 -0
  5. data/.github/workflows/pull-request.yml +15 -0
  6. data/.github/workflows/ruby.yml +34 -0
  7. data/.gitignore +11 -0
  8. data/.rubocop.yml +186 -0
  9. data/.simplecov +4 -0
  10. data/CHANGELOG.md +702 -0
  11. data/Gemfile +18 -12
  12. data/Guardfile +23 -0
  13. data/LICENSE +5 -1
  14. data/README.md +328 -0
  15. data/Rakefile +23 -23
  16. data/examples/roo_soap_client.rb +28 -31
  17. data/examples/roo_soap_server.rb +4 -6
  18. data/examples/write_me.rb +9 -10
  19. data/lib/roo/base.rb +317 -504
  20. data/lib/roo/constants.rb +7 -0
  21. data/lib/roo/csv.rb +141 -113
  22. data/lib/roo/errors.rb +11 -0
  23. data/lib/roo/excelx/cell/base.rb +108 -0
  24. data/lib/roo/excelx/cell/boolean.rb +30 -0
  25. data/lib/roo/excelx/cell/date.rb +28 -0
  26. data/lib/roo/excelx/cell/datetime.rb +107 -0
  27. data/lib/roo/excelx/cell/empty.rb +20 -0
  28. data/lib/roo/excelx/cell/number.rb +99 -0
  29. data/lib/roo/excelx/cell/string.rb +19 -0
  30. data/lib/roo/excelx/cell/time.rb +44 -0
  31. data/lib/roo/excelx/cell.rb +110 -0
  32. data/lib/roo/excelx/comments.rb +55 -0
  33. data/lib/roo/excelx/coordinate.rb +19 -0
  34. data/lib/roo/excelx/extractor.rb +39 -0
  35. data/lib/roo/excelx/format.rb +71 -0
  36. data/lib/roo/excelx/images.rb +26 -0
  37. data/lib/roo/excelx/relationships.rb +33 -0
  38. data/lib/roo/excelx/shared.rb +39 -0
  39. data/lib/roo/excelx/shared_strings.rb +151 -0
  40. data/lib/roo/excelx/sheet.rb +151 -0
  41. data/lib/roo/excelx/sheet_doc.rb +257 -0
  42. data/lib/roo/excelx/styles.rb +64 -0
  43. data/lib/roo/excelx/workbook.rb +64 -0
  44. data/lib/roo/excelx.rb +407 -601
  45. data/lib/roo/font.rb +17 -0
  46. data/lib/roo/formatters/base.rb +15 -0
  47. data/lib/roo/formatters/csv.rb +84 -0
  48. data/lib/roo/formatters/matrix.rb +23 -0
  49. data/lib/roo/formatters/xml.rb +31 -0
  50. data/lib/roo/formatters/yaml.rb +40 -0
  51. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  52. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  53. data/lib/roo/libre_office.rb +4 -0
  54. data/lib/roo/link.rb +34 -0
  55. data/lib/roo/open_office.rb +631 -0
  56. data/lib/roo/spreadsheet.rb +28 -23
  57. data/lib/roo/tempdir.rb +24 -0
  58. data/lib/roo/utils.rb +128 -0
  59. data/lib/roo/version.rb +3 -0
  60. data/lib/roo.rb +26 -24
  61. data/roo.gemspec +29 -203
  62. data/spec/helpers.rb +5 -0
  63. data/spec/lib/roo/base_spec.rb +291 -3
  64. data/spec/lib/roo/csv_spec.rb +38 -11
  65. data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
  66. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  67. data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
  68. data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
  69. data/spec/lib/roo/excelx_spec.rb +672 -11
  70. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  71. data/spec/lib/roo/openoffice_spec.rb +30 -8
  72. data/spec/lib/roo/spreadsheet_spec.rb +60 -12
  73. data/spec/lib/roo/strict_spec.rb +43 -0
  74. data/spec/lib/roo/utils_spec.rb +119 -0
  75. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  76. data/spec/lib/roo_spec.rb +0 -0
  77. data/spec/spec_helper.rb +7 -6
  78. data/test/all_ss.rb +12 -11
  79. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  80. data/test/excelx/cell/test_base.rb +68 -0
  81. data/test/excelx/cell/test_boolean.rb +36 -0
  82. data/test/excelx/cell/test_date.rb +38 -0
  83. data/test/excelx/cell/test_datetime.rb +45 -0
  84. data/test/excelx/cell/test_empty.rb +18 -0
  85. data/test/excelx/cell/test_number.rb +90 -0
  86. data/test/excelx/cell/test_string.rb +48 -0
  87. data/test/excelx/cell/test_time.rb +30 -0
  88. data/test/excelx/test_coordinate.rb +51 -0
  89. data/test/formatters/test_csv.rb +136 -0
  90. data/test/formatters/test_matrix.rb +76 -0
  91. data/test/formatters/test_xml.rb +78 -0
  92. data/test/formatters/test_yaml.rb +20 -0
  93. data/test/helpers/test_accessing_files.rb +81 -0
  94. data/test/helpers/test_comments.rb +43 -0
  95. data/test/helpers/test_formulas.rb +9 -0
  96. data/test/helpers/test_labels.rb +103 -0
  97. data/test/helpers/test_sheets.rb +55 -0
  98. data/test/helpers/test_styles.rb +62 -0
  99. data/test/roo/test_base.rb +182 -0
  100. data/test/roo/test_csv.rb +88 -0
  101. data/test/roo/test_excelx.rb +360 -0
  102. data/test/roo/test_libre_office.rb +9 -0
  103. data/test/roo/test_open_office.rb +289 -0
  104. data/test/test_helper.rb +123 -59
  105. data/test/test_roo.rb +392 -2292
  106. metadata +153 -298
  107. data/CHANGELOG +0 -417
  108. data/Gemfile.lock +0 -78
  109. data/README.markdown +0 -126
  110. data/VERSION +0 -1
  111. data/lib/roo/excel.rb +0 -355
  112. data/lib/roo/excel2003xml.rb +0 -300
  113. data/lib/roo/google.rb +0 -292
  114. data/lib/roo/openoffice.rb +0 -496
  115. data/lib/roo/roo_rails_helper.rb +0 -83
  116. data/lib/roo/worksheet.rb +0 -18
  117. data/scripts/txt2html +0 -67
  118. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  119. data/spec/lib/roo/excel_spec.rb +0 -17
  120. data/spec/lib/roo/google_spec.rb +0 -64
  121. data/test/files/1900_base.xls +0 -0
  122. data/test/files/1900_base.xlsx +0 -0
  123. data/test/files/1904_base.xls +0 -0
  124. data/test/files/1904_base.xlsx +0 -0
  125. data/test/files/Bibelbund.csv +0 -3741
  126. data/test/files/Bibelbund.ods +0 -0
  127. data/test/files/Bibelbund.xls +0 -0
  128. data/test/files/Bibelbund.xlsx +0 -0
  129. data/test/files/Bibelbund.xml +0 -62518
  130. data/test/files/Bibelbund1.ods +0 -0
  131. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  132. data/test/files/bad_excel_date.xls +0 -0
  133. data/test/files/bbu.ods +0 -0
  134. data/test/files/bbu.xls +0 -0
  135. data/test/files/bbu.xlsx +0 -0
  136. data/test/files/bbu.xml +0 -152
  137. data/test/files/bode-v1.ods.zip +0 -0
  138. data/test/files/bode-v1.xls.zip +0 -0
  139. data/test/files/boolean.csv +0 -2
  140. data/test/files/boolean.ods +0 -0
  141. data/test/files/boolean.xls +0 -0
  142. data/test/files/boolean.xlsx +0 -0
  143. data/test/files/boolean.xml +0 -112
  144. data/test/files/borders.ods +0 -0
  145. data/test/files/borders.xls +0 -0
  146. data/test/files/borders.xlsx +0 -0
  147. data/test/files/borders.xml +0 -144
  148. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  149. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  150. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  151. data/test/files/comments.ods +0 -0
  152. data/test/files/comments.xls +0 -0
  153. data/test/files/comments.xlsx +0 -0
  154. data/test/files/csvtypes.csv +0 -1
  155. data/test/files/datetime.ods +0 -0
  156. data/test/files/datetime.xls +0 -0
  157. data/test/files/datetime.xlsx +0 -0
  158. data/test/files/datetime.xml +0 -142
  159. data/test/files/datetime_floatconv.xls +0 -0
  160. data/test/files/datetime_floatconv.xml +0 -148
  161. data/test/files/dreimalvier.ods +0 -0
  162. data/test/files/emptysheets.ods +0 -0
  163. data/test/files/emptysheets.xls +0 -0
  164. data/test/files/emptysheets.xlsx +0 -0
  165. data/test/files/emptysheets.xml +0 -105
  166. data/test/files/excel2003.xml +0 -21140
  167. data/test/files/false_encoding.xls +0 -0
  168. data/test/files/false_encoding.xml +0 -132
  169. data/test/files/file_item_error.xlsx +0 -0
  170. data/test/files/formula.ods +0 -0
  171. data/test/files/formula.xls +0 -0
  172. data/test/files/formula.xlsx +0 -0
  173. data/test/files/formula.xml +0 -134
  174. data/test/files/formula_parse_error.xls +0 -0
  175. data/test/files/formula_parse_error.xml +0 -1833
  176. data/test/files/formula_string_error.xlsx +0 -0
  177. data/test/files/html-escape.ods +0 -0
  178. data/test/files/link.xls +0 -0
  179. data/test/files/link.xlsx +0 -0
  180. data/test/files/matrix.ods +0 -0
  181. data/test/files/matrix.xls +0 -0
  182. data/test/files/named_cells.ods +0 -0
  183. data/test/files/named_cells.xls +0 -0
  184. data/test/files/named_cells.xlsx +0 -0
  185. data/test/files/no_spreadsheet_file.txt +0 -1
  186. data/test/files/numbers1.csv +0 -18
  187. data/test/files/numbers1.ods +0 -0
  188. data/test/files/numbers1.xls +0 -0
  189. data/test/files/numbers1.xlsx +0 -0
  190. data/test/files/numbers1.xml +0 -312
  191. data/test/files/numeric-link.xlsx +0 -0
  192. data/test/files/only_one_sheet.ods +0 -0
  193. data/test/files/only_one_sheet.xls +0 -0
  194. data/test/files/only_one_sheet.xlsx +0 -0
  195. data/test/files/only_one_sheet.xml +0 -67
  196. data/test/files/paragraph.ods +0 -0
  197. data/test/files/paragraph.xls +0 -0
  198. data/test/files/paragraph.xlsx +0 -0
  199. data/test/files/paragraph.xml +0 -127
  200. data/test/files/prova.xls +0 -0
  201. data/test/files/ric.ods +0 -0
  202. data/test/files/simple_spreadsheet.ods +0 -0
  203. data/test/files/simple_spreadsheet.xls +0 -0
  204. data/test/files/simple_spreadsheet.xlsx +0 -0
  205. data/test/files/simple_spreadsheet.xml +0 -225
  206. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  207. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  208. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  209. data/test/files/so_datetime.csv +0 -7
  210. data/test/files/style.ods +0 -0
  211. data/test/files/style.xls +0 -0
  212. data/test/files/style.xlsx +0 -0
  213. data/test/files/style.xml +0 -154
  214. data/test/files/time-test.csv +0 -2
  215. data/test/files/time-test.ods +0 -0
  216. data/test/files/time-test.xls +0 -0
  217. data/test/files/time-test.xlsx +0 -0
  218. data/test/files/time-test.xml +0 -131
  219. data/test/files/type_excel.ods +0 -0
  220. data/test/files/type_excel.xlsx +0 -0
  221. data/test/files/type_excelx.ods +0 -0
  222. data/test/files/type_excelx.xls +0 -0
  223. data/test/files/type_openoffice.xls +0 -0
  224. data/test/files/type_openoffice.xlsx +0 -0
  225. data/test/files/whitespace.ods +0 -0
  226. data/test/files/whitespace.xls +0 -0
  227. data/test/files/whitespace.xlsx +0 -0
  228. data/test/files/whitespace.xml +0 -184
  229. data/test/rm_sub_test.rb +0 -12
  230. data/test/rm_test.rb +0 -7
  231. data/test/test_generic_spreadsheet.rb +0 -259
  232. data/website/index.html +0 -385
  233. data/website/index.txt +0 -423
  234. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  235. data/website/stylesheets/screen.css +0 -130
  236. data/website/template.rhtml +0 -48
data/lib/roo/excelx.rb CHANGED
@@ -1,674 +1,480 @@
1
- require 'date'
2
1
  require 'nokogiri'
3
- require 'spreadsheet'
4
-
5
- class Roo::Excelx < Roo::Base
6
- module Format
7
- EXCEPTIONAL_FORMATS = {
8
- 'h:mm am/pm' => :date,
9
- 'h:mm:ss am/pm' => :date,
10
- }
11
-
12
- STANDARD_FORMATS = {
13
- 0 => 'General',
14
- 1 => '0',
15
- 2 => '0.00',
16
- 3 => '#,##0',
17
- 4 => '#,##0.00',
18
- 9 => '0%',
19
- 10 => '0.00%',
20
- 11 => '0.00E+00',
21
- 12 => '# ?/?',
22
- 13 => '# ??/??',
23
- 14 => 'mm-dd-yy',
24
- 15 => 'd-mmm-yy',
25
- 16 => 'd-mmm',
26
- 17 => 'mmm-yy',
27
- 18 => 'h:mm AM/PM',
28
- 19 => 'h:mm:ss AM/PM',
29
- 20 => 'h:mm',
30
- 21 => 'h:mm:ss',
31
- 22 => 'm/d/yy h:mm',
32
- 37 => '#,##0 ;(#,##0)',
33
- 38 => '#,##0 ;[Red](#,##0)',
34
- 39 => '#,##0.00;(#,##0.00)',
35
- 40 => '#,##0.00;[Red](#,##0.00)',
36
- 45 => 'mm:ss',
37
- 46 => '[h]:mm:ss',
38
- 47 => 'mmss.0',
39
- 48 => '##0.0E+0',
40
- 49 => '@',
41
- }
42
-
43
- def to_type(format)
44
- format = format.to_s.downcase
45
- if type = EXCEPTIONAL_FORMATS[format]
46
- type
47
- elsif format.include?('#')
48
- :float
49
- elsif format.include?('d') || format.include?('y')
50
- if format.include?('h') || format.include?('s')
51
- :datetime
52
- else
53
- :date
54
- end
55
- elsif format.include?('h') || format.include?('s')
56
- :time
57
- elsif format.include?('%')
58
- :percentage
59
- else
60
- :float
2
+ require 'zip/filesystem'
3
+ require 'roo/link'
4
+ require 'roo/tempdir'
5
+ require 'roo/utils'
6
+ require 'forwardable'
7
+ require 'set'
8
+
9
+ module Roo
10
+ class Excelx < Roo::Base
11
+ extend Roo::Tempdir
12
+ extend Forwardable
13
+
14
+ ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
15
+
16
+ require 'roo/excelx/shared'
17
+ require 'roo/excelx/workbook'
18
+ require 'roo/excelx/shared_strings'
19
+ require 'roo/excelx/styles'
20
+ require 'roo/excelx/cell'
21
+ require 'roo/excelx/sheet'
22
+ require 'roo/excelx/relationships'
23
+ require 'roo/excelx/comments'
24
+ require 'roo/excelx/sheet_doc'
25
+ require 'roo/excelx/coordinate'
26
+ require 'roo/excelx/format'
27
+ require 'roo/excelx/images'
28
+
29
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
30
+ ExceedsMaxError = Class.new(StandardError)
31
+
32
+ # initialization and opening of a spreadsheet file
33
+ # values for packed: :zip
34
+ # optional cell_max (int) parameter for early aborting attempts to parse
35
+ # enormous documents.
36
+ def initialize(filename_or_stream, options = {})
37
+ packed = options[:packed]
38
+ file_warning = options.fetch(:file_warning, :error)
39
+ cell_max = options.delete(:cell_max)
40
+ sheet_options = {}
41
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
42
+ sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
43
+ sheet_options[:empty_cell] = (options[:empty_cell] || false)
44
+ shared_options = {}
45
+
46
+ shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
47
+ unless is_stream?(filename_or_stream)
48
+ file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
49
+ basename = find_basename(filename_or_stream)
61
50
  end
62
- end
63
51
 
64
- module_function :to_type
65
- end
52
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
53
+ # when the object is garbage collected. Initially, the finalizer was
54
+ # created in the Roo::Tempdir module, but that led to a segfault
55
+ # when testing in Ruby 2.4.0.
56
+ @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
57
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
66
58
 
67
- # initialization and opening of a spreadsheet file
68
- # values for packed: :zip
69
- def initialize(filename, options = {}, deprecated_file_warning = :error)
70
- if Hash === options
71
- packed = options[:packed]
72
- file_warning = options[:file_warning] || :error
73
- else
74
- warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprecated. Use an options hash instead.'
75
- packed = options
76
- file_warning = deprecated_file_warning
77
- end
78
-
79
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
80
- make_tmpdir do |tmpdir|
81
- filename = download_uri(filename, tmpdir) if uri?(filename)
82
- filename = unzip(filename, tmpdir) if packed == :zip
83
- @filename = filename
84
- unless File.file?(@filename)
85
- raise IOError, "file #{@filename} does not exist"
86
- end
87
- @comments_files = Array.new
88
- @rels_files = Array.new
89
- extract_content(tmpdir, @filename)
90
- @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml"))
91
- @shared_table = []
92
- if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml'))
93
- @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml'))
94
- read_shared_strings(@sharedstring_doc)
59
+ @shared = Shared.new(@tmpdir, shared_options)
60
+ @filename = local_filename(filename_or_stream, @tmpdir, packed)
61
+ process_zipfile(@filename || filename_or_stream)
62
+
63
+ @sheet_names = []
64
+ @sheets = []
65
+ @sheets_by_name = {}
66
+
67
+ workbook.sheets.each_with_index do |sheet, index|
68
+ next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
69
+
70
+ sheet_name = sheet['name']
71
+ @sheet_names << sheet_name
72
+ @sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
95
73
  end
96
- @styles_table = []
97
- @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
98
- if File.exist?(File.join(tmpdir, 'roo_styles.xml'))
99
- @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml'))
100
- read_styles(@styles_doc)
74
+
75
+ if cell_max
76
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
77
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
101
78
  end
102
- @sheet_doc = load_xmls(@sheet_files)
103
- @comments_doc = load_xmls(@comments_files)
104
- @rels_doc = load_xmls(@rels_files)
105
- end
106
- super(filename, options)
107
- @formula = Hash.new
108
- @excelx_type = Hash.new
109
- @excelx_value = Hash.new
110
- @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
111
- @comment = Hash.new
112
- @comments_read = Hash.new
113
- @hyperlink = Hash.new
114
- @hyperlinks_read = Hash.new
115
- end
116
79
 
117
- def method_missing(m,*args)
118
- # is method name a label name
119
- read_labels
120
- if @label.has_key?(m.to_s)
121
- sheet ||= @default_sheet
122
- read_cells(sheet)
123
- row,col = label(m.to_s)
124
- cell(row,col)
125
- else
126
- # call super for methods like #a1
127
80
  super
81
+ rescue
82
+ self.class.finalize_tempdirs(object_id)
83
+ raise
128
84
  end
129
- end
130
85
 
131
- # Returns the content of a spreadsheet-cell.
132
- # (1,1) is the upper left corner.
133
- # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
134
- # cell at the first line and first row.
135
- def cell(row, col, sheet=nil)
136
- sheet ||= @default_sheet
137
- read_cells(sheet)
138
- row,col = normalize(row,col)
139
- if celltype(row,col,sheet) == :date
140
- yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
141
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
142
- elsif celltype(row,col,sheet) == :datetime
143
- date_part,time_part = @cell[sheet][[row,col]].split(' ')
144
- yyyy,mm,dd = date_part.split('-')
145
- hh,mi,ss = time_part.split(':')
146
- return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
147
- end
148
- @cell[sheet][[row,col]]
149
- end
150
-
151
- # Returns the formula at (row,col).
152
- # Returns nil if there is no formula.
153
- # The method #formula? checks if there is a formula.
154
- def formula(row,col,sheet=nil)
155
- sheet ||= @default_sheet
156
- read_cells(sheet)
157
- row,col = normalize(row,col)
158
- @formula[sheet][[row,col]] && @formula[sheet][[row,col]]
159
- end
160
- alias_method :formula?, :formula
161
-
162
- # returns each formula in the selected sheet as an array of elements
163
- # [row, col, formula]
164
- def formulas(sheet=nil)
165
- sheet ||= @default_sheet
166
- read_cells(sheet)
167
- if @formula[sheet]
168
- @formula[sheet].each.collect do |elem|
169
- [elem[0][0], elem[0][1], elem[1]]
86
+ def method_missing(method, *args)
87
+ if (label = workbook.defined_names[method.to_s])
88
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
89
+ else
90
+ # call super for methods like #a1
91
+ super
170
92
  end
171
- else
172
- []
173
93
  end
174
- end
175
94
 
176
- class Font
177
- attr_accessor :bold, :italic, :underline
95
+ def sheets
96
+ @sheet_names
97
+ end
178
98
 
179
- def bold?
180
- @bold == true
99
+ def sheet_for(sheet)
100
+ sheet ||= default_sheet
101
+ validate_sheet!(sheet)
102
+ @sheets_by_name[sheet] || @sheets[sheet]
181
103
  end
182
104
 
183
- def italic?
184
- @italic == true
105
+ def images(sheet = nil)
106
+ images_names = sheet_for(sheet).images.map(&:last)
107
+ images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
185
108
  end
186
109
 
187
- def underline?
188
- @underline == true
110
+ # Returns the content of a spreadsheet-cell.
111
+ # (1,1) is the upper left corner.
112
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
113
+ # cell at the first line and first row.
114
+ def cell(row, col, sheet = nil)
115
+ key = normalize(row, col)
116
+ safe_send(sheet_for(sheet).cells[key], :value)
189
117
  end
190
- end
191
118
 
192
- # Given a cell, return the cell's style
193
- def font(row, col, sheet=nil)
194
- sheet ||= @default_sheet
195
- read_cells(sheet)
196
- row,col = normalize(row,col)
197
- s_attribute = @s_attribute[sheet][[row,col]]
198
- s_attribute ||= 0
199
- s_attribute = s_attribute.to_i
200
- @style_definitions[s_attribute]
201
- end
119
+ def row(rownumber, sheet = nil)
120
+ sheet_for(sheet).row(rownumber)
121
+ end
202
122
 
203
- # returns the type of a cell:
204
- # * :float
205
- # * :string,
206
- # * :date
207
- # * :percentage
208
- # * :formula
209
- # * :time
210
- # * :datetime
211
- def celltype(row,col,sheet=nil)
212
- sheet ||= @default_sheet
213
- read_cells(sheet)
214
- row,col = normalize(row,col)
215
- if @formula[sheet][[row,col]]
216
- return :formula
217
- else
218
- @cell_type[sheet][[row,col]]
123
+ # returns all values in this column as an array
124
+ # column numbers are 1,2,3,... like in the spreadsheet
125
+ def column(column_number, sheet = nil)
126
+ if column_number.is_a?(::String)
127
+ column_number = ::Roo::Utils.letter_to_number(column_number)
128
+ end
129
+ sheet_for(sheet).column(column_number)
219
130
  end
220
- end
221
131
 
222
- # returns the internal type of an excel cell
223
- # * :numeric_or_formula
224
- # * :string
225
- # Note: this is only available within the Excelx class
226
- def excelx_type(row,col,sheet=nil)
227
- sheet ||= @default_sheet
228
- read_cells(sheet)
229
- row,col = normalize(row,col)
230
- return @excelx_type[sheet][[row,col]]
231
- end
132
+ # returns the number of the first non-empty row
133
+ def first_row(sheet = nil)
134
+ sheet_for(sheet).first_row
135
+ end
232
136
 
233
- # returns the internal value of an excelx cell
234
- # Note: this is only available within the Excelx class
235
- def excelx_value(row,col,sheet=nil)
236
- sheet ||= @default_sheet
237
- read_cells(sheet)
238
- row,col = normalize(row,col)
239
- return @excelx_value[sheet][[row,col]]
240
- end
137
+ # returns the number of the last non-empty row
138
+ def last_row(sheet = nil)
139
+ sheet_for(sheet).last_row
140
+ end
241
141
 
242
- # returns the internal format of an excel cell
243
- def excelx_format(row,col,sheet=nil)
244
- sheet ||= @default_sheet
245
- read_cells(sheet)
246
- row,col = normalize(row,col)
247
- s = @s_attribute[sheet][[row,col]]
248
- attribute2format(s).to_s
249
- end
142
+ # returns the number of the first non-empty column
143
+ def first_column(sheet = nil)
144
+ sheet_for(sheet).first_column
145
+ end
250
146
 
251
- # returns an array of sheet names in the spreadsheet
252
- def sheets
253
- @workbook_doc.xpath("//xmlns:sheet").map do |sheet|
254
- sheet['name']
147
+ # returns the number of the last non-empty column
148
+ def last_column(sheet = nil)
149
+ sheet_for(sheet).last_column
255
150
  end
256
- end
257
151
 
258
- # shows the internal representation of all cells
259
- # for debugging purposes
260
- def to_s(sheet=nil)
261
- sheet ||= @default_sheet
262
- read_cells(sheet)
263
- @cell[sheet].inspect
264
- end
152
+ # set a cell to a certain value
153
+ # (this will not be saved back to the spreadsheet file!)
154
+ def set(row, col, value, sheet = nil) #:nodoc:
155
+ key = normalize(row, col)
156
+ cell_type = cell_type_by_value(value)
157
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
158
+ end
265
159
 
266
- # returns the row,col values of the labelled cell
267
- # (nil,nil) if label is not defined
268
- def label(labelname)
269
- read_labels
270
- if @label.empty? || !@label.has_key?(labelname)
271
- return nil,nil,nil
272
- else
273
- return @label[labelname][1].to_i,
274
- Roo::Base.letter_to_number(@label[labelname][2]),
275
- @label[labelname][0]
160
+ # Returns the formula at (row,col).
161
+ # Returns nil if there is no formula.
162
+ # The method #formula? checks if there is a formula.
163
+ def formula(row, col, sheet = nil)
164
+ key = normalize(row, col)
165
+ safe_send(sheet_for(sheet).cells[key], :formula)
276
166
  end
277
- end
278
167
 
279
- # Returns an array which all labels. Each element is an array with
280
- # [labelname, [row,col,sheetname]]
281
- def labels
282
- # sheet ||= @default_sheet
283
- # read_cells(sheet)
284
- read_labels
285
- @label.map do |label|
286
- [ label[0], # name
287
- [ label[1][1].to_i, # row
288
- Roo::Base.letter_to_number(label[1][2]), # column
289
- label[1][0], # sheet
290
- ] ]
168
+ # Predicate methods really should return a boolean
169
+ # value. Hopefully no one was relying on the fact that this
170
+ # previously returned either nil/formula
171
+ def formula?(*args)
172
+ !!formula(*args)
291
173
  end
292
- end
293
174
 
294
- def hyperlink?(row,col,sheet=nil)
295
- hyperlink(row, col, sheet) != nil
296
- end
175
+ # returns each formula in the selected sheet as an array of tuples in following format
176
+ # [[row, col, formula], [row, col, formula],...]
177
+ def formulas(sheet = nil)
178
+ sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
179
+ [x, y, cell.formula]
180
+ end
181
+ end
297
182
 
298
- # returns the hyperlink at (row/col)
299
- # nil if there is no hyperlink
300
- def hyperlink(row,col,sheet=nil)
301
- sheet ||= @default_sheet
302
- read_hyperlinks(sheet) unless @hyperlinks_read[sheet]
303
- row,col = normalize(row,col)
304
- return nil unless @hyperlink[sheet]
305
- @hyperlink[sheet][[row,col]]
306
- end
183
+ # Given a cell, return the cell's style
184
+ def font(row, col, sheet = nil)
185
+ key = normalize(row, col)
186
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
187
+ styles.definitions[definition_index] if definition_index
188
+ end
307
189
 
308
- # returns the comment at (row/col)
309
- # nil if there is no comment
310
- def comment(row,col,sheet=nil)
311
- sheet ||= @default_sheet
312
- #read_cells(sheet)
313
- read_comments(sheet) unless @comments_read[sheet]
314
- row,col = normalize(row,col)
315
- return nil unless @comment[sheet]
316
- @comment[sheet][[row,col]]
317
- end
190
+ # returns the type of a cell:
191
+ # * :float
192
+ # * :string,
193
+ # * :date
194
+ # * :percentage
195
+ # * :formula
196
+ # * :time
197
+ # * :datetime
198
+ def celltype(row, col, sheet = nil)
199
+ key = normalize(row, col)
200
+ safe_send(sheet_for(sheet).cells[key], :type)
201
+ end
318
202
 
319
- # true, if there is a comment
320
- def comment?(row,col,sheet=nil)
321
- sheet ||= @default_sheet
322
- # read_cells(sheet)
323
- read_comments(sheet) unless @comments_read[sheet]
324
- row,col = normalize(row,col)
325
- comment(row,col) != nil
326
- end
203
+ # returns the internal type of an excel cell
204
+ # * :numeric_or_formula
205
+ # * :string
206
+ # Note: this is only available within the Excelx class
207
+ def excelx_type(row, col, sheet = nil)
208
+ key = normalize(row, col)
209
+ safe_send(sheet_for(sheet).cells[key], :cell_type)
210
+ end
327
211
 
328
- # returns each comment in the selected sheet as an array of elements
329
- # [row, col, comment]
330
- def comments(sheet=nil)
331
- sheet ||= @default_sheet
332
- read_comments(sheet) unless @comments_read[sheet]
333
- if @comment[sheet]
334
- @comment[sheet].each.collect do |elem|
335
- [elem[0][0],elem[0][1],elem[1]]
336
- end
337
- else
338
- []
212
+ # returns the internal value of an excelx cell
213
+ # Note: this is only available within the Excelx class
214
+ def excelx_value(row, col, sheet = nil)
215
+ key = normalize(row, col)
216
+ safe_send(sheet_for(sheet).cells[key], :cell_value)
339
217
  end
340
- end
341
218
 
342
- private
219
+ # returns the internal value of an excelx cell
220
+ # Note: this is only available within the Excelx class
221
+ def formatted_value(row, col, sheet = nil)
222
+ key = normalize(row, col)
223
+ safe_send(sheet_for(sheet).cells[key], :formatted_value)
224
+ end
343
225
 
344
- def load_xmls(paths)
345
- paths.compact.map do |item|
346
- load_xml(item)
226
+ # returns the internal format of an excel cell
227
+ def excelx_format(row, col, sheet = nil)
228
+ key = normalize(row, col)
229
+ sheet_for(sheet).excelx_format(key)
347
230
  end
348
- end
349
231
 
350
- # helper function to set the internal representation of cells
351
- def set_cell_values(sheet,x,y,i,v,value_type,formula,
352
- excelx_type=nil,
353
- excelx_value=nil,
354
- s_attribute=nil)
355
- key = [y,x+i]
356
- @cell_type[sheet] ||= {}
357
- @cell_type[sheet][key] = value_type
358
- @formula[sheet] ||= {}
359
- @formula[sheet][key] = formula if formula
360
- @cell[sheet] ||= {}
361
- @cell[sheet][key] =
362
- case @cell_type[sheet][key]
363
- when :float
364
- v.to_f
365
- when :string
366
- v
367
- when :date
368
- (base_date+v.to_i).strftime("%Y-%m-%d")
369
- when :datetime
370
- (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
371
- when :percentage
372
- v.to_f
373
- when :time
374
- v.to_f*(24*60*60)
375
- else
376
- v
377
- end
232
+ def empty?(row, col, sheet = nil)
233
+ sheet = sheet_for(sheet)
234
+ key = normalize(row, col)
235
+ cell = sheet.cells[key]
236
+ !cell || cell.empty? ||
237
+ (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
238
+ end
378
239
 
379
- @cell[sheet][key] = Spreadsheet::Link.new(@hyperlink[sheet][key], @cell[sheet][key].to_s) if hyperlink?(y,x+i)
380
- @excelx_type[sheet] ||= {}
381
- @excelx_type[sheet][key] = excelx_type
382
- @excelx_value[sheet] ||= {}
383
- @excelx_value[sheet][key] = excelx_value
384
- @s_attribute[sheet] ||= {}
385
- @s_attribute[sheet][key] = s_attribute
386
- end
240
+ # shows the internal representation of all cells
241
+ # for debugging purposes
242
+ def to_s(sheet = nil)
243
+ sheet_for(sheet).cells.inspect
244
+ end
387
245
 
388
- # read all cells in the selected sheet
389
- def read_cells(sheet=nil)
390
- sheet ||= @default_sheet
391
- validate_sheet!(sheet)
392
- return if @cells_read[sheet]
393
-
394
- @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c|
395
- s_attribute = c['s'].to_i # should be here
396
- # c: <c r="A5" s="2">
397
- # <v>22606</v>
398
- # </c>, format: , tmp_type: float
399
- value_type =
400
- case c['t']
401
- when 's'
402
- :shared
403
- when 'b'
404
- :boolean
405
- # 2011-02-25 BEGIN
406
- when 'str'
407
- :string
408
- # 2011-02-25 END
409
- # 2011-09-15 BEGIN
410
- when 'inlineStr'
411
- :inlinestr
412
- # 2011-09-15 END
413
- else
414
- format = attribute2format(s_attribute)
415
- Format.to_type(format)
416
- end
417
- formula = nil
418
- c.children.each do |cell|
419
- case cell.name
420
- when 'is'
421
- cell.children.each do |is|
422
- if is.name == 't'
423
- inlinestr_content = is.content
424
- value_type = :string
425
- v = inlinestr_content
426
- excelx_type = :string
427
- y, x = Roo::Base.split_coordinate(c['r'])
428
- excelx_value = inlinestr_content #cell.content
429
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
430
- end
431
- end
432
- when 'f'
433
- formula = cell.content
434
- when 'v'
435
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
436
- value_type =
437
- if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
438
- :datetime
439
- else
440
- :date
441
- end
442
- end
443
- excelx_type = [:numeric_or_formula,format.to_s]
444
- excelx_value = cell.content
445
- v =
446
- case value_type
447
- when :shared
448
- value_type = :string
449
- excelx_type = :string
450
- @shared_table[cell.content.to_i]
451
- when :boolean
452
- (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
453
- when :date
454
- cell.content
455
- when :time
456
- cell.content
457
- when :datetime
458
- cell.content
459
- when :formula
460
- cell.content.to_f #TODO: !!!!
461
- when :string
462
- excelx_type = :string
463
- cell.content
464
- else
465
- value_type = :float
466
- cell.content
467
- end
468
- y, x = Roo::Base.split_coordinate(c['r'])
469
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
470
- end
246
+ # returns the row,col values of the labelled cell
247
+ # (nil,nil) if label is not defined
248
+ def label(name)
249
+ labels = workbook.defined_names
250
+ return [nil, nil, nil] if labels.empty? || !labels.key?(name)
251
+
252
+ [labels[name].row, labels[name].col, labels[name].sheet]
253
+ end
254
+
255
+ # Returns an array which all labels. Each element is an array with
256
+ # [labelname, [row,col,sheetname]]
257
+ def labels
258
+ @labels ||= workbook.defined_names.map do |name, label|
259
+ [
260
+ name,
261
+ [label.row, label.col, label.sheet]
262
+ ]
471
263
  end
472
264
  end
473
- @cells_read[sheet] = true
474
- # begin comments
475
- =begin
476
- Datei xl/comments1.xml
477
- <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
478
- <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
479
- <authors>
480
- <author />
481
- </authors>
482
- <commentList>
483
- <comment ref="B4" authorId="0">
484
- <text>
485
- <r>
486
- <rPr>
487
- <sz val="10" />
488
- <rFont val="Arial" />
489
- <family val="2" />
490
- </rPr>
491
- <t>Kommentar fuer B4</t>
492
- </r>
493
- </text>
494
- </comment>
495
- <comment ref="B5" authorId="0">
496
- <text>
497
- <r>
498
- <rPr>
499
- <sz val="10" />
500
- <rFont val="Arial" />
501
- <family val="2" />
502
- </rPr>
503
- <t>Kommentar fuer B5</t>
504
- </r>
505
- </text>
506
- </comment>
507
- </commentList>
508
- </comments>
509
- =end
510
- =begin
511
- if @comments_doc[self.sheets.index(sheet)]
512
- read_comments(sheet)
513
- end
514
- =end
515
- #end comments
516
- end
517
265
 
518
- # Reads all comments from a sheet
519
- def read_comments(sheet=nil)
520
- sheet ||= @default_sheet
521
- validate_sheet!(sheet)
522
- n = self.sheets.index(sheet)
523
- return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
524
- @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment|
525
- ref = comment.attributes['ref'].to_s
526
- row,col = Roo::Base.split_coordinate(ref)
527
- comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text|
528
- @comment[sheet] ||= {}
529
- @comment[sheet][[row,col]] = text.text
266
+ def hyperlink?(row, col, sheet = nil)
267
+ !!hyperlink(row, col, sheet)
268
+ end
269
+
270
+ # returns the hyperlink at (row/col)
271
+ # nil if there is no hyperlink
272
+ def hyperlink(row, col, sheet = nil)
273
+ key = normalize(row, col)
274
+ sheet_for(sheet).hyperlinks[key]
275
+ end
276
+
277
+ # returns the comment at (row/col)
278
+ # nil if there is no comment
279
+ def comment(row, col, sheet = nil)
280
+ key = normalize(row, col)
281
+ sheet_for(sheet).comments[key]
282
+ end
283
+
284
+ # true, if there is a comment
285
+ def comment?(row, col, sheet = nil)
286
+ !!comment(row, col, sheet)
287
+ end
288
+
289
+ def comments(sheet = nil)
290
+ sheet_for(sheet).comments.map do |(x, y), comment|
291
+ [x, y, comment]
530
292
  end
531
293
  end
532
- @comments_read[sheet] = true
533
- end
534
294
 
535
- # Reads all hyperlinks from a sheet
536
- def read_hyperlinks(sheet=nil)
537
- sheet ||= @default_sheet
538
- validate_sheet!(sheet)
539
- n = self.sheets.index(sheet)
540
- if rels_doc = @rels_doc[n]
541
- rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r|
542
- [r.attribute('Id').text, r]
543
- end]
544
- @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h|
545
- if rel_element = rels[h.attribute('id').text]
546
- row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s)
547
- @hyperlink[sheet] ||= {}
548
- @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text
549
- end
295
+ # Yield an array of Excelx::Cell
296
+ # Takes options for sheet, pad_cells, and max_rows
297
+ def each_row_streaming(options = {})
298
+ sheet = sheet_for(options.delete(:sheet))
299
+ if block_given?
300
+ sheet.each_row(options) { |row| yield row }
301
+ else
302
+ sheet.to_enum(:each_row, options)
550
303
  end
551
304
  end
552
- @hyperlinks_read[sheet] = true
553
- end
554
305
 
555
- def read_labels
556
- @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name|
557
- # "Sheet1!$C$5"
558
- sheet, coordinates = defined_name.text.split('!$', 2)
559
- col,row = coordinates.split('$')
560
- [defined_name['name'], [sheet,row,col]]
561
- end]
562
- end
306
+ private
563
307
 
564
- # Extracts all needed files from the zip file
565
- def process_zipfile(tmpdir, zipfilename, zip, path='')
566
- @sheet_files = []
567
- Roo::ZipFile.open(zipfilename) {|zf|
568
- zf.entries.each {|entry|
569
- entry_name = entry.to_s.downcase
308
+ def clean_sheet(sheet)
309
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
310
+ next unless value.value.is_a?(::String)
570
311
 
571
- path =
572
- if entry_name.end_with?('workbook.xml')
573
- "#{tmpdir}/roo_workbook.xml"
574
- elsif entry_name.end_with?('sharedstrings.xml')
575
- "#{tmpdir}/roo_sharedStrings.xml"
576
- elsif entry_name.end_with?('styles.xml')
577
- "#{tmpdir}/roo_styles.xml"
578
- elsif entry_name =~ /sheet([0-9]+).xml$/
579
- nr = $1
580
- @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}"
581
- elsif entry_name =~ /comments([0-9]+).xml$/
582
- nr = $1
583
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
584
- elsif entry_name =~ /sheet([0-9]+).xml.rels$/
585
- nr = $1
586
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
587
- end
588
- if path
589
- extract_file(zip, entry, path)
590
- end
591
- }
592
- }
593
- end
312
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
313
+ end
594
314
 
595
- def extract_file(source_zip, entry, destination_path)
596
- open(destination_path,'wb') {|f|
597
- f << source_zip.read(entry)
598
- }
599
- end
315
+ @cleaned[sheet] = true
316
+ end
600
317
 
601
- # extract files from the zip file
602
- def extract_content(tmpdir, zipfilename)
603
- Roo::ZipFile.open(@filename) do |zip|
604
- process_zipfile(tmpdir, zipfilename,zip)
318
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
319
+ # documents require a workbook.xml file, so a if the file is missing
320
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
321
+ # raised.
322
+ #
323
+ # wb - a Zip::Entry for the workbook.xml file.
324
+ # path - A String for Zip::Entry's destination path.
325
+ #
326
+ # Examples
327
+ #
328
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
329
+ # # => ["rId1", "rId2", "rId3"]
330
+ #
331
+ # Returns an Array of Strings.
332
+ def extract_worksheet_ids(entries, path)
333
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
334
+ fail ArgumentError 'missing required workbook file' if wb.nil?
335
+
336
+ wb.extract(path)
337
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
338
+ workbook_doc.xpath('//sheet').map { |s| s['id'] }
605
339
  end
606
- end
607
340
 
608
- # read the shared strings xml document
609
- def read_shared_strings(doc)
610
- doc.xpath("/xmlns:sst/xmlns:si").each do |si|
611
- shared_table_entry = ''
612
- si.children.each do |elem|
613
- if elem.name == 'r' and elem.children
614
- elem.children.each do |r_elem|
615
- if r_elem.name == 't'
616
- shared_table_entry << r_elem.content
617
- end
618
- end
619
- end
620
- if elem.name == 't'
621
- shared_table_entry = elem.content
622
- end
341
+ # Internal
342
+ #
343
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
344
+ # path - A String for the Zip::Entry's destination path.
345
+ #
346
+ # Examples
347
+ #
348
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
349
+ # # => {
350
+ # "rId1"=>"worksheets/sheet1.xml",
351
+ # "rId2"=>"worksheets/sheet2.xml",
352
+ # "rId3"=>"worksheets/sheet3.xml"
353
+ # }
354
+ #
355
+ # Returns a Hash.
356
+ def extract_worksheet_rels(entries, path)
357
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
358
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
359
+
360
+ wb_rels.extract(path)
361
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
362
+
363
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
364
+ worksheet_types.include? relationship['Type']
365
+ end
366
+
367
+ relationships.each_with_object({}) do |relationship, hash|
368
+ hash[relationship['Id']] = relationship['Target']
623
369
  end
624
- @shared_table << shared_table_entry
625
370
  end
626
- end
627
371
 
628
- # read the styles elements of an excelx document
629
- def read_styles(doc)
630
- @cellXfs = []
631
-
632
- @numFmts = Hash[doc.xpath("//xmlns:numFmt").map do |numFmt|
633
- [numFmt['numFmtId'], numFmt['formatCode']]
634
- end]
635
- fonts = doc.xpath("//xmlns:fonts/xmlns:font").map do |font_el|
636
- Font.new.tap do |font|
637
- font.bold = !font_el.xpath('./xmlns:b').empty?
638
- font.italic = !font_el.xpath('./xmlns:i').empty?
639
- font.underline = !font_el.xpath('./xmlns:u').empty?
372
+ # Extracts the sheets in order, but it will ignore sheets that are not
373
+ # worksheets.
374
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
375
+ (sheet_ids & sheets.keys).each_with_index do |id, i|
376
+ name = sheets[id]
377
+ entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
378
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
379
+ sheet_files << path
380
+ @sheet_files << path
381
+ entry.extract(path)
640
382
  end
641
383
  end
642
384
 
643
- doc.xpath("//xmlns:cellXfs").each do |xfs|
644
- xfs.children.each do |xf|
645
- @cellXfs << xf['numFmtId']
646
- @style_definitions << fonts[xf['fontId'].to_i]
385
+ def extract_images(entries, tmpdir)
386
+ img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
387
+ img_entries.each do |entry|
388
+ path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
389
+ image_files << path
390
+ entry.extract(path)
647
391
  end
648
392
  end
649
- end
650
393
 
651
- # convert internal excelx attribute to a format
652
- def attribute2format(s)
653
- id = @cellXfs[s.to_i]
654
- @numFmts[id] || Format::STANDARD_FORMATS[id.to_i]
655
- end
394
+ # Extracts all needed files from the zip file
395
+ def process_zipfile(zipfilename_or_stream)
396
+ @sheet_files = []
656
397
 
657
- def base_date
658
- @base_date ||= read_base_date
659
- end
398
+ unless is_stream?(zipfilename_or_stream)
399
+ zip_file = Zip::File.open(zipfilename_or_stream)
400
+ else
401
+ zip_file = Zip::CentralDirectory.new
402
+ zip_file.read_from_stream zipfilename_or_stream
403
+ end
404
+
405
+ process_zipfile_entries zip_file.to_a.sort_by(&:name)
406
+ end
407
+
408
+ def process_zipfile_entries(entries)
409
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
410
+ # are not in order. With Numbers 3.1, the first sheet is always
411
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
412
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
413
+ # first worksheet).
414
+ #
415
+ # workbook.xml lists the correct order of worksheets and
416
+ # workbook.xml.rels lists the filenames for those worksheets.
417
+ #
418
+ # workbook.xml:
419
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
420
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
421
+ # workbook.xml.rel:
422
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
423
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
424
+ sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
425
+ sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
426
+ extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
427
+ extract_images(entries, @tmpdir)
428
+
429
+ entries.each do |entry|
430
+ path =
431
+ case entry.name.downcase
432
+ when /richdata/
433
+ # FIXME: Ignore richData as parsing is not implemented yet and can cause
434
+ # Zip::DestinationFileExistsError when including a second "styles.xml" entry
435
+ # see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
436
+ nil
437
+ when /sharedstrings.xml$/
438
+ "#{@tmpdir}/roo_sharedStrings.xml"
439
+ when /styles.xml$/
440
+ "#{@tmpdir}/roo_styles.xml"
441
+ when /comments([0-9]+).xml$/
442
+ # FIXME: Most of the time, The order of the comment files are the same
443
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
444
+ # In some situations, this isn't true. The true location of a
445
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
446
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
447
+ nr = Regexp.last_match[1].to_i
448
+ comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
449
+ when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
450
+ # NOTE: Chart sheet relationship files were interfering with
451
+ # worksheets.
452
+ nil
453
+ when /sheet([0-9]+).xml.rels$/
454
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
455
+ # it also stores the location for sharedStrings, comments,
456
+ # drawings, etc.
457
+ nr = Regexp.last_match[1].to_i
458
+ rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
459
+ when /drawing([0-9]+).xml.rels$/
460
+ # Extracting drawing relationships to make images lists for each sheet
461
+ nr = Regexp.last_match[1].to_i
462
+ image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
463
+ end
660
464
 
661
- # Default to 1900 (minus one day due to excel quirk) but use 1904 if
662
- # it's set in the Workbook's workbookPr
663
- # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
664
- def read_base_date
665
- base_date = Date.new(1899,12,30)
666
- @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr|
667
- if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i
668
- base_date = Date.new(1904,01,01)
465
+ entry.extract(path) if path
669
466
  end
670
467
  end
671
- base_date
672
- end
673
468
 
674
- end # class
469
+ def safe_send(object, method, *args)
470
+ object.send(method, *args) if object&.respond_to?(method)
471
+ end
472
+
473
+ def worksheet_types
474
+ [
475
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
476
+ 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
477
+ ]
478
+ end
479
+ end
480
+ end