roo 1.13.2 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +17 -0
  3. data/.github/issue_template.md +16 -0
  4. data/.github/pull_request_template.md +14 -0
  5. data/.github/workflows/pull-request.yml +15 -0
  6. data/.github/workflows/ruby.yml +34 -0
  7. data/.gitignore +11 -0
  8. data/.rubocop.yml +186 -0
  9. data/.simplecov +4 -0
  10. data/CHANGELOG.md +702 -0
  11. data/Gemfile +18 -12
  12. data/Guardfile +23 -0
  13. data/LICENSE +5 -1
  14. data/README.md +328 -0
  15. data/Rakefile +23 -23
  16. data/examples/roo_soap_client.rb +28 -31
  17. data/examples/roo_soap_server.rb +4 -6
  18. data/examples/write_me.rb +9 -10
  19. data/lib/roo/base.rb +317 -504
  20. data/lib/roo/constants.rb +7 -0
  21. data/lib/roo/csv.rb +141 -113
  22. data/lib/roo/errors.rb +11 -0
  23. data/lib/roo/excelx/cell/base.rb +108 -0
  24. data/lib/roo/excelx/cell/boolean.rb +30 -0
  25. data/lib/roo/excelx/cell/date.rb +28 -0
  26. data/lib/roo/excelx/cell/datetime.rb +107 -0
  27. data/lib/roo/excelx/cell/empty.rb +20 -0
  28. data/lib/roo/excelx/cell/number.rb +99 -0
  29. data/lib/roo/excelx/cell/string.rb +19 -0
  30. data/lib/roo/excelx/cell/time.rb +44 -0
  31. data/lib/roo/excelx/cell.rb +110 -0
  32. data/lib/roo/excelx/comments.rb +55 -0
  33. data/lib/roo/excelx/coordinate.rb +19 -0
  34. data/lib/roo/excelx/extractor.rb +39 -0
  35. data/lib/roo/excelx/format.rb +71 -0
  36. data/lib/roo/excelx/images.rb +26 -0
  37. data/lib/roo/excelx/relationships.rb +33 -0
  38. data/lib/roo/excelx/shared.rb +39 -0
  39. data/lib/roo/excelx/shared_strings.rb +151 -0
  40. data/lib/roo/excelx/sheet.rb +151 -0
  41. data/lib/roo/excelx/sheet_doc.rb +257 -0
  42. data/lib/roo/excelx/styles.rb +64 -0
  43. data/lib/roo/excelx/workbook.rb +64 -0
  44. data/lib/roo/excelx.rb +407 -601
  45. data/lib/roo/font.rb +17 -0
  46. data/lib/roo/formatters/base.rb +15 -0
  47. data/lib/roo/formatters/csv.rb +84 -0
  48. data/lib/roo/formatters/matrix.rb +23 -0
  49. data/lib/roo/formatters/xml.rb +31 -0
  50. data/lib/roo/formatters/yaml.rb +40 -0
  51. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  52. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  53. data/lib/roo/libre_office.rb +4 -0
  54. data/lib/roo/link.rb +34 -0
  55. data/lib/roo/open_office.rb +631 -0
  56. data/lib/roo/spreadsheet.rb +28 -23
  57. data/lib/roo/tempdir.rb +24 -0
  58. data/lib/roo/utils.rb +128 -0
  59. data/lib/roo/version.rb +3 -0
  60. data/lib/roo.rb +26 -24
  61. data/roo.gemspec +29 -203
  62. data/spec/helpers.rb +5 -0
  63. data/spec/lib/roo/base_spec.rb +291 -3
  64. data/spec/lib/roo/csv_spec.rb +38 -11
  65. data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
  66. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  67. data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
  68. data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
  69. data/spec/lib/roo/excelx_spec.rb +672 -11
  70. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  71. data/spec/lib/roo/openoffice_spec.rb +30 -8
  72. data/spec/lib/roo/spreadsheet_spec.rb +60 -12
  73. data/spec/lib/roo/strict_spec.rb +43 -0
  74. data/spec/lib/roo/utils_spec.rb +119 -0
  75. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  76. data/spec/lib/roo_spec.rb +0 -0
  77. data/spec/spec_helper.rb +7 -6
  78. data/test/all_ss.rb +12 -11
  79. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  80. data/test/excelx/cell/test_base.rb +68 -0
  81. data/test/excelx/cell/test_boolean.rb +36 -0
  82. data/test/excelx/cell/test_date.rb +38 -0
  83. data/test/excelx/cell/test_datetime.rb +45 -0
  84. data/test/excelx/cell/test_empty.rb +18 -0
  85. data/test/excelx/cell/test_number.rb +90 -0
  86. data/test/excelx/cell/test_string.rb +48 -0
  87. data/test/excelx/cell/test_time.rb +30 -0
  88. data/test/excelx/test_coordinate.rb +51 -0
  89. data/test/formatters/test_csv.rb +136 -0
  90. data/test/formatters/test_matrix.rb +76 -0
  91. data/test/formatters/test_xml.rb +78 -0
  92. data/test/formatters/test_yaml.rb +20 -0
  93. data/test/helpers/test_accessing_files.rb +81 -0
  94. data/test/helpers/test_comments.rb +43 -0
  95. data/test/helpers/test_formulas.rb +9 -0
  96. data/test/helpers/test_labels.rb +103 -0
  97. data/test/helpers/test_sheets.rb +55 -0
  98. data/test/helpers/test_styles.rb +62 -0
  99. data/test/roo/test_base.rb +182 -0
  100. data/test/roo/test_csv.rb +88 -0
  101. data/test/roo/test_excelx.rb +360 -0
  102. data/test/roo/test_libre_office.rb +9 -0
  103. data/test/roo/test_open_office.rb +289 -0
  104. data/test/test_helper.rb +123 -59
  105. data/test/test_roo.rb +392 -2292
  106. metadata +153 -298
  107. data/CHANGELOG +0 -417
  108. data/Gemfile.lock +0 -78
  109. data/README.markdown +0 -126
  110. data/VERSION +0 -1
  111. data/lib/roo/excel.rb +0 -355
  112. data/lib/roo/excel2003xml.rb +0 -300
  113. data/lib/roo/google.rb +0 -292
  114. data/lib/roo/openoffice.rb +0 -496
  115. data/lib/roo/roo_rails_helper.rb +0 -83
  116. data/lib/roo/worksheet.rb +0 -18
  117. data/scripts/txt2html +0 -67
  118. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  119. data/spec/lib/roo/excel_spec.rb +0 -17
  120. data/spec/lib/roo/google_spec.rb +0 -64
  121. data/test/files/1900_base.xls +0 -0
  122. data/test/files/1900_base.xlsx +0 -0
  123. data/test/files/1904_base.xls +0 -0
  124. data/test/files/1904_base.xlsx +0 -0
  125. data/test/files/Bibelbund.csv +0 -3741
  126. data/test/files/Bibelbund.ods +0 -0
  127. data/test/files/Bibelbund.xls +0 -0
  128. data/test/files/Bibelbund.xlsx +0 -0
  129. data/test/files/Bibelbund.xml +0 -62518
  130. data/test/files/Bibelbund1.ods +0 -0
  131. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  132. data/test/files/bad_excel_date.xls +0 -0
  133. data/test/files/bbu.ods +0 -0
  134. data/test/files/bbu.xls +0 -0
  135. data/test/files/bbu.xlsx +0 -0
  136. data/test/files/bbu.xml +0 -152
  137. data/test/files/bode-v1.ods.zip +0 -0
  138. data/test/files/bode-v1.xls.zip +0 -0
  139. data/test/files/boolean.csv +0 -2
  140. data/test/files/boolean.ods +0 -0
  141. data/test/files/boolean.xls +0 -0
  142. data/test/files/boolean.xlsx +0 -0
  143. data/test/files/boolean.xml +0 -112
  144. data/test/files/borders.ods +0 -0
  145. data/test/files/borders.xls +0 -0
  146. data/test/files/borders.xlsx +0 -0
  147. data/test/files/borders.xml +0 -144
  148. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  149. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  150. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  151. data/test/files/comments.ods +0 -0
  152. data/test/files/comments.xls +0 -0
  153. data/test/files/comments.xlsx +0 -0
  154. data/test/files/csvtypes.csv +0 -1
  155. data/test/files/datetime.ods +0 -0
  156. data/test/files/datetime.xls +0 -0
  157. data/test/files/datetime.xlsx +0 -0
  158. data/test/files/datetime.xml +0 -142
  159. data/test/files/datetime_floatconv.xls +0 -0
  160. data/test/files/datetime_floatconv.xml +0 -148
  161. data/test/files/dreimalvier.ods +0 -0
  162. data/test/files/emptysheets.ods +0 -0
  163. data/test/files/emptysheets.xls +0 -0
  164. data/test/files/emptysheets.xlsx +0 -0
  165. data/test/files/emptysheets.xml +0 -105
  166. data/test/files/excel2003.xml +0 -21140
  167. data/test/files/false_encoding.xls +0 -0
  168. data/test/files/false_encoding.xml +0 -132
  169. data/test/files/file_item_error.xlsx +0 -0
  170. data/test/files/formula.ods +0 -0
  171. data/test/files/formula.xls +0 -0
  172. data/test/files/formula.xlsx +0 -0
  173. data/test/files/formula.xml +0 -134
  174. data/test/files/formula_parse_error.xls +0 -0
  175. data/test/files/formula_parse_error.xml +0 -1833
  176. data/test/files/formula_string_error.xlsx +0 -0
  177. data/test/files/html-escape.ods +0 -0
  178. data/test/files/link.xls +0 -0
  179. data/test/files/link.xlsx +0 -0
  180. data/test/files/matrix.ods +0 -0
  181. data/test/files/matrix.xls +0 -0
  182. data/test/files/named_cells.ods +0 -0
  183. data/test/files/named_cells.xls +0 -0
  184. data/test/files/named_cells.xlsx +0 -0
  185. data/test/files/no_spreadsheet_file.txt +0 -1
  186. data/test/files/numbers1.csv +0 -18
  187. data/test/files/numbers1.ods +0 -0
  188. data/test/files/numbers1.xls +0 -0
  189. data/test/files/numbers1.xlsx +0 -0
  190. data/test/files/numbers1.xml +0 -312
  191. data/test/files/numeric-link.xlsx +0 -0
  192. data/test/files/only_one_sheet.ods +0 -0
  193. data/test/files/only_one_sheet.xls +0 -0
  194. data/test/files/only_one_sheet.xlsx +0 -0
  195. data/test/files/only_one_sheet.xml +0 -67
  196. data/test/files/paragraph.ods +0 -0
  197. data/test/files/paragraph.xls +0 -0
  198. data/test/files/paragraph.xlsx +0 -0
  199. data/test/files/paragraph.xml +0 -127
  200. data/test/files/prova.xls +0 -0
  201. data/test/files/ric.ods +0 -0
  202. data/test/files/simple_spreadsheet.ods +0 -0
  203. data/test/files/simple_spreadsheet.xls +0 -0
  204. data/test/files/simple_spreadsheet.xlsx +0 -0
  205. data/test/files/simple_spreadsheet.xml +0 -225
  206. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  207. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  208. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  209. data/test/files/so_datetime.csv +0 -7
  210. data/test/files/style.ods +0 -0
  211. data/test/files/style.xls +0 -0
  212. data/test/files/style.xlsx +0 -0
  213. data/test/files/style.xml +0 -154
  214. data/test/files/time-test.csv +0 -2
  215. data/test/files/time-test.ods +0 -0
  216. data/test/files/time-test.xls +0 -0
  217. data/test/files/time-test.xlsx +0 -0
  218. data/test/files/time-test.xml +0 -131
  219. data/test/files/type_excel.ods +0 -0
  220. data/test/files/type_excel.xlsx +0 -0
  221. data/test/files/type_excelx.ods +0 -0
  222. data/test/files/type_excelx.xls +0 -0
  223. data/test/files/type_openoffice.xls +0 -0
  224. data/test/files/type_openoffice.xlsx +0 -0
  225. data/test/files/whitespace.ods +0 -0
  226. data/test/files/whitespace.xls +0 -0
  227. data/test/files/whitespace.xlsx +0 -0
  228. data/test/files/whitespace.xml +0 -184
  229. data/test/rm_sub_test.rb +0 -12
  230. data/test/rm_test.rb +0 -7
  231. data/test/test_generic_spreadsheet.rb +0 -259
  232. data/website/index.html +0 -385
  233. data/website/index.txt +0 -423
  234. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  235. data/website/stylesheets/screen.css +0 -130
  236. data/website/template.rhtml +0 -48
@@ -0,0 +1,151 @@
1
+ require 'forwardable'
2
+ module Roo
3
+ class Excelx
4
+ class Sheet
5
+ extend Forwardable
6
+
7
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
8
+
9
+ attr_reader :images
10
+
11
+ def initialize(name, shared, sheet_index, options = {})
12
+ @name = name
13
+ @shared = shared
14
+ @sheet_index = sheet_index
15
+ @images = Images.new(image_rels[sheet_index]).list
16
+ @rels = Relationships.new(rels_files[sheet_index])
17
+ @comments = Comments.new(comments_files[sheet_index])
18
+ @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
19
+ end
20
+
21
+ def cells
22
+ @cells ||= @sheet.cells(@rels)
23
+ end
24
+
25
+ def present_cells
26
+ @present_cells ||= begin
27
+ warn %{
28
+ [DEPRECATION] present_cells is deprecated. Alternate:
29
+ with activesupport => cells[key].presence
30
+ without activesupport => cells[key]&.presence
31
+ }
32
+ cells.select { |_, cell| cell&.presence }
33
+ end
34
+ end
35
+
36
+ # Yield each row as array of Excelx::Cell objects
37
+ # accepts options max_rows (int) (offset by 1 for header),
38
+ # pad_cells (boolean) and offset (int)
39
+ def each_row(options = {}, &block)
40
+ row_count = 0
41
+ options[:offset] ||= 0
42
+ @sheet.each_row_streaming do |row|
43
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
44
+ if block_given? && !(options[:offset] && row_count < options[:offset])
45
+ block.call(cells_for_row_element(row, options))
46
+ end
47
+ row_count += 1
48
+ end
49
+ end
50
+
51
+ def row(row_number)
52
+ first_column.upto(last_column).map do |col|
53
+ cells[[row_number, col]]&.value
54
+ end
55
+ end
56
+
57
+ def column(col_number)
58
+ first_row.upto(last_row).map do |row|
59
+ cells[[row, col_number]]&.value
60
+ end
61
+ end
62
+
63
+ # returns the number of the first non-empty row
64
+ def first_row
65
+ @first_row ||= first_last_row_col[:first_row]
66
+ end
67
+
68
+ def last_row
69
+ @last_row ||= first_last_row_col[:last_row]
70
+ end
71
+
72
+ # returns the number of the first non-empty column
73
+ def first_column
74
+ @first_column ||= first_last_row_col[:first_column]
75
+ end
76
+
77
+ # returns the number of the last non-empty column
78
+ def last_column
79
+ @last_column ||= first_last_row_col[:last_column]
80
+ end
81
+
82
+ def excelx_format(key)
83
+ cell = cells[key]
84
+ styles.style_format(cell.style).to_s if cell
85
+ end
86
+
87
+ def hyperlinks
88
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
89
+ end
90
+
91
+ def comments
92
+ @comments.comments
93
+ end
94
+
95
+ def dimensions
96
+ @sheet.dimensions
97
+ end
98
+
99
+ private
100
+
101
+ # Take an xml row and return an array of Excelx::Cell objects
102
+ # optionally pad array to header width(assumed 1st row).
103
+ # takes option pad_cells (boolean) defaults false
104
+ def cells_for_row_element(row_element, options = {})
105
+ return [] unless row_element
106
+ cell_col = 0
107
+ cells = []
108
+ @sheet.each_cell(row_element) do |cell|
109
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
110
+ cells << cell
111
+ cell_col = cell.coordinate.column
112
+ end
113
+ cells
114
+ end
115
+
116
+ def pad_cells(cell, last_column)
117
+ pad = []
118
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
119
+ pad
120
+ end
121
+
122
+ def first_last_row_col
123
+ @first_last_row_col ||= begin
124
+ first_row = last_row = first_col = last_col = nil
125
+
126
+ cells.each do |(row, col), cell|
127
+ next unless cell&.presence
128
+ first_row ||= row
129
+ last_row ||= row
130
+ first_col ||= col
131
+ last_col ||= col
132
+
133
+ if row > last_row
134
+ last_row = row
135
+ elsif row < first_row
136
+ first_row = row
137
+ end
138
+
139
+ if col > last_col
140
+ last_col = col
141
+ elsif col < first_col
142
+ first_col = col
143
+ end
144
+ end
145
+
146
+ {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,257 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+ require 'roo/excelx/extractor'
5
+
6
+ module Roo
7
+ class Excelx
8
+ class SheetDoc < Excelx::Extractor
9
+ extend Forwardable
10
+ delegate [:workbook] => :@shared
11
+
12
+ def initialize(path, relationships, shared, options = {})
13
+ super(path)
14
+ @shared = shared
15
+ @options = options
16
+ @relationships = relationships
17
+ end
18
+
19
+ def cells(relationships)
20
+ @cells ||= extract_cells(relationships)
21
+ end
22
+
23
+ def hyperlinks(relationships)
24
+ # If you're sure you're not going to need this hyperlinks you can discard it
25
+ @hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink")
26
+ {}
27
+ else
28
+ extract_hyperlinks(relationships)
29
+ end
30
+ end
31
+
32
+ # Get the dimensions for the sheet.
33
+ # This is the upper bound of cells that might
34
+ # be parsed. (the document may be sparse so cell count is only upper bound)
35
+ def dimensions
36
+ @dimensions ||= extract_dimensions
37
+ end
38
+
39
+ # Yield each row xml element to caller
40
+ def each_row_streaming(&block)
41
+ Roo::Utils.each_element(@path, 'row', &block)
42
+ end
43
+
44
+ # Yield each cell as Excelx::Cell to caller for given
45
+ # row xml
46
+ def each_cell(row_xml)
47
+ return [] unless row_xml
48
+ row_xml.children.each do |cell_element|
49
+ coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
50
+ hyperlinks = hyperlinks(@relationships)[coordinate]
51
+
52
+ yield cell_from_xml(cell_element, hyperlinks, coordinate)
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def cell_value_type(type, format)
59
+ case type
60
+ when 's'
61
+ :shared
62
+ when 'b'
63
+ :boolean
64
+ when 'str'
65
+ :string
66
+ when 'inlineStr'
67
+ :inlinestr
68
+ else
69
+ Excelx::Format.to_type(format)
70
+ end
71
+ end
72
+
73
+ # Internal: Creates a cell based on an XML clell..
74
+ #
75
+ # cell_xml - a Nokogiri::XML::Element. e.g.
76
+ # <c r="A5" s="2">
77
+ # <v>22606</v>
78
+ # </c>
79
+ # hyperlink - a String for the hyperlink for the cell or nil when no
80
+ # hyperlink is present.
81
+ # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
82
+ # or nil to extract coordinate from cell_xml.
83
+ # empty_cell - an Optional Boolean value.
84
+ #
85
+ # Examples
86
+ #
87
+ # cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
88
+ # # => <Excelx::Cell::String>
89
+ #
90
+ # Returns a type of <Excelx::Cell>.
91
+ def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
92
+ coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
93
+ cell_xml_children = cell_xml.children
94
+ return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
95
+
96
+ # NOTE: This is error prone, to_i will silently turn a nil into a 0.
97
+ # This works by coincidence because Format[0] is General.
98
+ style = cell_xml["s"].to_i
99
+ formula = nil
100
+
101
+ cell_xml_children.each do |cell|
102
+ case cell.name
103
+ when 'is'
104
+ content = cell.search('t').map(&:content).join
105
+ unless content.empty?
106
+ return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
107
+ end
108
+ when 'f'
109
+ formula = cell.content
110
+ when 'v'
111
+ format = style_format(style)
112
+ value_type = cell_value_type(cell_xml["t"], format)
113
+
114
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
115
+ end
116
+ end
117
+
118
+ create_empty_cell(coordinate, empty_cell)
119
+ end
120
+
121
+ def create_empty_cell(coordinate, empty_cell)
122
+ if empty_cell
123
+ Excelx::Cell::Empty.new(coordinate)
124
+ end
125
+ end
126
+
127
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
128
+ # NOTE: format.to_s can replace excelx_type as an argument for
129
+ # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
130
+ # it will break some brittle tests.
131
+ excelx_type = [:numeric_or_formula, format.to_s]
132
+
133
+ # NOTE: There are only a few situations where value != cell.content
134
+ # 1. when a sharedString is used. value = sharedString;
135
+ # cell.content = id of sharedString
136
+ # 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
137
+ # But a boolean cell should use TRUE|FALSE as the formatted value
138
+ # and use a Boolean for it's value. Using a Boolean value breaks
139
+ # Roo::Base#to_csv.
140
+ # 3. formula
141
+ case value_type
142
+ when :shared
143
+ cell_content = cell.content.to_i
144
+ value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
145
+ Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
146
+ when :boolean, :string
147
+ value = cell.content
148
+ Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
149
+ when :time, :datetime
150
+ cell_content = cell.content.to_f
151
+ # NOTE: A date will be a whole number. A time will have be > 1. And
152
+ # in general, a datetime will have decimals. But if the cell is
153
+ # using a custom format, it's possible to be interpreted incorrectly.
154
+ # cell_content.to_i == cell_content && standard_style?=> :date
155
+ #
156
+ # Should check to see if the format is standard or not. If it's a
157
+ # standard format, than it's a date, otherwise, it is a datetime.
158
+ # @styles.standard_style?(style_id)
159
+ # STANDARD_STYLES.keys.include?(style_id.to_i)
160
+ cell_type = if cell_content < 1.0
161
+ :time
162
+ elsif (cell_content - cell_content.floor).abs > 0.000001
163
+ :datetime
164
+ else
165
+ :date
166
+ end
167
+ base_value = cell_type == :date ? base_date : base_timestamp
168
+ Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
169
+ when :date
170
+ Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
171
+ else
172
+ Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
173
+ end
174
+ end
175
+
176
+ def extract_hyperlinks(relationships)
177
+ return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
178
+
179
+ hyperlinks.each_with_object({}) do |hyperlink, hash|
180
+ if relationship = relationships[hyperlink['id']]
181
+ target_link = relationship['Target']
182
+ target_link += "##{hyperlink['location']}" if hyperlink['location']
183
+
184
+ Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord|
185
+ hash[coord] = target_link
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ def expand_merged_ranges(cells)
192
+ # Extract merged ranges from xml
193
+ merges = {}
194
+ doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
195
+ src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
196
+ next unless cells[src]
197
+ for row in src[0]..dst[0] do
198
+ for col in src[1]..dst[1] do
199
+ next if row == src[0] && col == src[1]
200
+ merges[[row, col]] = src
201
+ end
202
+ end
203
+ end
204
+ # Duplicate value into all cells in merged range
205
+ merges.each do |dst, src|
206
+ cells[dst] = cells[src]
207
+ end
208
+ end
209
+
210
+ def extract_cells(relationships)
211
+ extracted_cells = {}
212
+ empty_cell = @options[:empty_cell]
213
+
214
+ doc.xpath('/worksheet/sheetData/row').each.with_index(1) do |row_xml, ycoord|
215
+ row_xml.xpath('c').each.with_index(1) do |cell_xml, xcoord|
216
+ r = cell_xml['r']
217
+ coordinate =
218
+ if r.nil?
219
+ ::Roo::Excelx::Coordinate.new(ycoord, xcoord)
220
+ else
221
+ ::Roo::Utils.extract_coordinate(r)
222
+ end
223
+
224
+ cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
225
+ extracted_cells[coordinate] = cell if cell
226
+ end
227
+ end
228
+
229
+ expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
230
+
231
+ extracted_cells
232
+ end
233
+
234
+ def extract_dimensions
235
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
236
+ return dimension["ref"]
237
+ end
238
+ end
239
+
240
+ def style_format(style)
241
+ @shared.styles.style_format(style)
242
+ end
243
+
244
+ def base_date
245
+ @shared.base_date
246
+ end
247
+
248
+ def base_timestamp
249
+ @shared.base_timestamp
250
+ end
251
+
252
+ def shared_strings
253
+ @shared.shared_strings
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,64 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx
6
+ class Styles < Excelx::Extractor
7
+ # convert internal excelx attribute to a format
8
+ def style_format(style)
9
+ id = num_fmt_ids[style.to_i]
10
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
11
+ end
12
+
13
+ def definitions
14
+ @definitions ||= extract_definitions
15
+ end
16
+
17
+ private
18
+
19
+ def num_fmt_ids
20
+ @num_fmt_ids ||= extract_num_fmt_ids
21
+ end
22
+
23
+ def num_fmts
24
+ @num_fmts ||= extract_num_fmts
25
+ end
26
+
27
+ def fonts
28
+ @fonts ||= extract_fonts
29
+ end
30
+
31
+ def extract_definitions
32
+ doc.xpath('//cellXfs').flat_map do |xfs|
33
+ xfs.children.map do |xf|
34
+ fonts[xf['fontId'].to_i]
35
+ end
36
+ end
37
+ end
38
+
39
+ def extract_fonts
40
+ doc.xpath('//fonts/font').map do |font_el|
41
+ Font.new.tap do |font|
42
+ font.bold = !font_el.xpath('./b').empty?
43
+ font.italic = !font_el.xpath('./i').empty?
44
+ font.underline = !font_el.xpath('./u').empty?
45
+ end
46
+ end
47
+ end
48
+
49
+ def extract_num_fmt_ids
50
+ doc.xpath('//cellXfs').flat_map do |xfs|
51
+ xfs.children.map do |xf|
52
+ xf['numFmtId']
53
+ end
54
+ end.compact
55
+ end
56
+
57
+ def extract_num_fmts
58
+ doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
59
+ hash[num_fmt['numFmtId']] = num_fmt['formatCode']
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,64 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Workbook < Excelx::Extractor
6
+ class Label
7
+ attr_reader :sheet, :row, :col, :name
8
+
9
+ def initialize(name, sheet, row, col)
10
+ @name = name
11
+ @sheet = sheet
12
+ @row = row.to_i
13
+ @col = ::Roo::Utils.letter_to_number(col)
14
+ end
15
+
16
+ def key
17
+ [@row, @col]
18
+ end
19
+ end
20
+
21
+ def initialize(path)
22
+ super
23
+ fail ArgumentError, 'missing required workbook file' unless doc_exists?
24
+ end
25
+
26
+ def sheets
27
+ doc.xpath('//sheet')
28
+ end
29
+
30
+ # aka labels
31
+ def defined_names
32
+ doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
33
+ # "Sheet1!$C$5"
34
+ sheet, coordinates = defined_name.text.split('!$', 2)
35
+ next unless coordinates
36
+ col, row = coordinates.split('$')
37
+ name = defined_name['name']
38
+ hash[name] = Label.new(name, sheet, row, col)
39
+ end
40
+ end
41
+
42
+ def base_timestamp
43
+ @base_timestamp ||= base_date.to_datetime.to_time.to_i
44
+ end
45
+
46
+ def base_date
47
+ @base_date ||=
48
+ begin
49
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
50
+ # it's set in the Workbook's workbookPr
51
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
52
+ result = Date.new(1899, 12, 30) # default
53
+ doc.css('workbookPr[date1904]').each do |workbookPr|
54
+ if workbookPr['date1904'] =~ /true|1/i
55
+ result = Date.new(1904, 01, 01)
56
+ break
57
+ end
58
+ end
59
+ result
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end