roo 1.13.2 → 2.10.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (236) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +17 -0
  3. data/.github/issue_template.md +16 -0
  4. data/.github/pull_request_template.md +14 -0
  5. data/.github/workflows/pull-request.yml +15 -0
  6. data/.github/workflows/ruby.yml +34 -0
  7. data/.gitignore +11 -0
  8. data/.rubocop.yml +186 -0
  9. data/.simplecov +4 -0
  10. data/CHANGELOG.md +702 -0
  11. data/Gemfile +18 -12
  12. data/Guardfile +23 -0
  13. data/LICENSE +5 -1
  14. data/README.md +328 -0
  15. data/Rakefile +23 -23
  16. data/examples/roo_soap_client.rb +28 -31
  17. data/examples/roo_soap_server.rb +4 -6
  18. data/examples/write_me.rb +9 -10
  19. data/lib/roo/base.rb +317 -504
  20. data/lib/roo/constants.rb +7 -0
  21. data/lib/roo/csv.rb +141 -113
  22. data/lib/roo/errors.rb +11 -0
  23. data/lib/roo/excelx/cell/base.rb +108 -0
  24. data/lib/roo/excelx/cell/boolean.rb +30 -0
  25. data/lib/roo/excelx/cell/date.rb +28 -0
  26. data/lib/roo/excelx/cell/datetime.rb +107 -0
  27. data/lib/roo/excelx/cell/empty.rb +20 -0
  28. data/lib/roo/excelx/cell/number.rb +99 -0
  29. data/lib/roo/excelx/cell/string.rb +19 -0
  30. data/lib/roo/excelx/cell/time.rb +44 -0
  31. data/lib/roo/excelx/cell.rb +110 -0
  32. data/lib/roo/excelx/comments.rb +55 -0
  33. data/lib/roo/excelx/coordinate.rb +19 -0
  34. data/lib/roo/excelx/extractor.rb +39 -0
  35. data/lib/roo/excelx/format.rb +71 -0
  36. data/lib/roo/excelx/images.rb +26 -0
  37. data/lib/roo/excelx/relationships.rb +33 -0
  38. data/lib/roo/excelx/shared.rb +39 -0
  39. data/lib/roo/excelx/shared_strings.rb +151 -0
  40. data/lib/roo/excelx/sheet.rb +151 -0
  41. data/lib/roo/excelx/sheet_doc.rb +257 -0
  42. data/lib/roo/excelx/styles.rb +64 -0
  43. data/lib/roo/excelx/workbook.rb +64 -0
  44. data/lib/roo/excelx.rb +407 -601
  45. data/lib/roo/font.rb +17 -0
  46. data/lib/roo/formatters/base.rb +15 -0
  47. data/lib/roo/formatters/csv.rb +84 -0
  48. data/lib/roo/formatters/matrix.rb +23 -0
  49. data/lib/roo/formatters/xml.rb +31 -0
  50. data/lib/roo/formatters/yaml.rb +40 -0
  51. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  52. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  53. data/lib/roo/libre_office.rb +4 -0
  54. data/lib/roo/link.rb +34 -0
  55. data/lib/roo/open_office.rb +631 -0
  56. data/lib/roo/spreadsheet.rb +28 -23
  57. data/lib/roo/tempdir.rb +24 -0
  58. data/lib/roo/utils.rb +128 -0
  59. data/lib/roo/version.rb +3 -0
  60. data/lib/roo.rb +26 -24
  61. data/roo.gemspec +29 -203
  62. data/spec/helpers.rb +5 -0
  63. data/spec/lib/roo/base_spec.rb +291 -3
  64. data/spec/lib/roo/csv_spec.rb +38 -11
  65. data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
  66. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  67. data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
  68. data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
  69. data/spec/lib/roo/excelx_spec.rb +672 -11
  70. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  71. data/spec/lib/roo/openoffice_spec.rb +30 -8
  72. data/spec/lib/roo/spreadsheet_spec.rb +60 -12
  73. data/spec/lib/roo/strict_spec.rb +43 -0
  74. data/spec/lib/roo/utils_spec.rb +119 -0
  75. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  76. data/spec/lib/roo_spec.rb +0 -0
  77. data/spec/spec_helper.rb +7 -6
  78. data/test/all_ss.rb +12 -11
  79. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  80. data/test/excelx/cell/test_base.rb +68 -0
  81. data/test/excelx/cell/test_boolean.rb +36 -0
  82. data/test/excelx/cell/test_date.rb +38 -0
  83. data/test/excelx/cell/test_datetime.rb +45 -0
  84. data/test/excelx/cell/test_empty.rb +18 -0
  85. data/test/excelx/cell/test_number.rb +90 -0
  86. data/test/excelx/cell/test_string.rb +48 -0
  87. data/test/excelx/cell/test_time.rb +30 -0
  88. data/test/excelx/test_coordinate.rb +51 -0
  89. data/test/formatters/test_csv.rb +136 -0
  90. data/test/formatters/test_matrix.rb +76 -0
  91. data/test/formatters/test_xml.rb +78 -0
  92. data/test/formatters/test_yaml.rb +20 -0
  93. data/test/helpers/test_accessing_files.rb +81 -0
  94. data/test/helpers/test_comments.rb +43 -0
  95. data/test/helpers/test_formulas.rb +9 -0
  96. data/test/helpers/test_labels.rb +103 -0
  97. data/test/helpers/test_sheets.rb +55 -0
  98. data/test/helpers/test_styles.rb +62 -0
  99. data/test/roo/test_base.rb +182 -0
  100. data/test/roo/test_csv.rb +88 -0
  101. data/test/roo/test_excelx.rb +360 -0
  102. data/test/roo/test_libre_office.rb +9 -0
  103. data/test/roo/test_open_office.rb +289 -0
  104. data/test/test_helper.rb +123 -59
  105. data/test/test_roo.rb +392 -2292
  106. metadata +153 -298
  107. data/CHANGELOG +0 -417
  108. data/Gemfile.lock +0 -78
  109. data/README.markdown +0 -126
  110. data/VERSION +0 -1
  111. data/lib/roo/excel.rb +0 -355
  112. data/lib/roo/excel2003xml.rb +0 -300
  113. data/lib/roo/google.rb +0 -292
  114. data/lib/roo/openoffice.rb +0 -496
  115. data/lib/roo/roo_rails_helper.rb +0 -83
  116. data/lib/roo/worksheet.rb +0 -18
  117. data/scripts/txt2html +0 -67
  118. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  119. data/spec/lib/roo/excel_spec.rb +0 -17
  120. data/spec/lib/roo/google_spec.rb +0 -64
  121. data/test/files/1900_base.xls +0 -0
  122. data/test/files/1900_base.xlsx +0 -0
  123. data/test/files/1904_base.xls +0 -0
  124. data/test/files/1904_base.xlsx +0 -0
  125. data/test/files/Bibelbund.csv +0 -3741
  126. data/test/files/Bibelbund.ods +0 -0
  127. data/test/files/Bibelbund.xls +0 -0
  128. data/test/files/Bibelbund.xlsx +0 -0
  129. data/test/files/Bibelbund.xml +0 -62518
  130. data/test/files/Bibelbund1.ods +0 -0
  131. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  132. data/test/files/bad_excel_date.xls +0 -0
  133. data/test/files/bbu.ods +0 -0
  134. data/test/files/bbu.xls +0 -0
  135. data/test/files/bbu.xlsx +0 -0
  136. data/test/files/bbu.xml +0 -152
  137. data/test/files/bode-v1.ods.zip +0 -0
  138. data/test/files/bode-v1.xls.zip +0 -0
  139. data/test/files/boolean.csv +0 -2
  140. data/test/files/boolean.ods +0 -0
  141. data/test/files/boolean.xls +0 -0
  142. data/test/files/boolean.xlsx +0 -0
  143. data/test/files/boolean.xml +0 -112
  144. data/test/files/borders.ods +0 -0
  145. data/test/files/borders.xls +0 -0
  146. data/test/files/borders.xlsx +0 -0
  147. data/test/files/borders.xml +0 -144
  148. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  149. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  150. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  151. data/test/files/comments.ods +0 -0
  152. data/test/files/comments.xls +0 -0
  153. data/test/files/comments.xlsx +0 -0
  154. data/test/files/csvtypes.csv +0 -1
  155. data/test/files/datetime.ods +0 -0
  156. data/test/files/datetime.xls +0 -0
  157. data/test/files/datetime.xlsx +0 -0
  158. data/test/files/datetime.xml +0 -142
  159. data/test/files/datetime_floatconv.xls +0 -0
  160. data/test/files/datetime_floatconv.xml +0 -148
  161. data/test/files/dreimalvier.ods +0 -0
  162. data/test/files/emptysheets.ods +0 -0
  163. data/test/files/emptysheets.xls +0 -0
  164. data/test/files/emptysheets.xlsx +0 -0
  165. data/test/files/emptysheets.xml +0 -105
  166. data/test/files/excel2003.xml +0 -21140
  167. data/test/files/false_encoding.xls +0 -0
  168. data/test/files/false_encoding.xml +0 -132
  169. data/test/files/file_item_error.xlsx +0 -0
  170. data/test/files/formula.ods +0 -0
  171. data/test/files/formula.xls +0 -0
  172. data/test/files/formula.xlsx +0 -0
  173. data/test/files/formula.xml +0 -134
  174. data/test/files/formula_parse_error.xls +0 -0
  175. data/test/files/formula_parse_error.xml +0 -1833
  176. data/test/files/formula_string_error.xlsx +0 -0
  177. data/test/files/html-escape.ods +0 -0
  178. data/test/files/link.xls +0 -0
  179. data/test/files/link.xlsx +0 -0
  180. data/test/files/matrix.ods +0 -0
  181. data/test/files/matrix.xls +0 -0
  182. data/test/files/named_cells.ods +0 -0
  183. data/test/files/named_cells.xls +0 -0
  184. data/test/files/named_cells.xlsx +0 -0
  185. data/test/files/no_spreadsheet_file.txt +0 -1
  186. data/test/files/numbers1.csv +0 -18
  187. data/test/files/numbers1.ods +0 -0
  188. data/test/files/numbers1.xls +0 -0
  189. data/test/files/numbers1.xlsx +0 -0
  190. data/test/files/numbers1.xml +0 -312
  191. data/test/files/numeric-link.xlsx +0 -0
  192. data/test/files/only_one_sheet.ods +0 -0
  193. data/test/files/only_one_sheet.xls +0 -0
  194. data/test/files/only_one_sheet.xlsx +0 -0
  195. data/test/files/only_one_sheet.xml +0 -67
  196. data/test/files/paragraph.ods +0 -0
  197. data/test/files/paragraph.xls +0 -0
  198. data/test/files/paragraph.xlsx +0 -0
  199. data/test/files/paragraph.xml +0 -127
  200. data/test/files/prova.xls +0 -0
  201. data/test/files/ric.ods +0 -0
  202. data/test/files/simple_spreadsheet.ods +0 -0
  203. data/test/files/simple_spreadsheet.xls +0 -0
  204. data/test/files/simple_spreadsheet.xlsx +0 -0
  205. data/test/files/simple_spreadsheet.xml +0 -225
  206. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  207. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  208. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  209. data/test/files/so_datetime.csv +0 -7
  210. data/test/files/style.ods +0 -0
  211. data/test/files/style.xls +0 -0
  212. data/test/files/style.xlsx +0 -0
  213. data/test/files/style.xml +0 -154
  214. data/test/files/time-test.csv +0 -2
  215. data/test/files/time-test.ods +0 -0
  216. data/test/files/time-test.xls +0 -0
  217. data/test/files/time-test.xlsx +0 -0
  218. data/test/files/time-test.xml +0 -131
  219. data/test/files/type_excel.ods +0 -0
  220. data/test/files/type_excel.xlsx +0 -0
  221. data/test/files/type_excelx.ods +0 -0
  222. data/test/files/type_excelx.xls +0 -0
  223. data/test/files/type_openoffice.xls +0 -0
  224. data/test/files/type_openoffice.xlsx +0 -0
  225. data/test/files/whitespace.ods +0 -0
  226. data/test/files/whitespace.xls +0 -0
  227. data/test/files/whitespace.xlsx +0 -0
  228. data/test/files/whitespace.xml +0 -184
  229. data/test/rm_sub_test.rb +0 -12
  230. data/test/rm_test.rb +0 -7
  231. data/test/test_generic_spreadsheet.rb +0 -259
  232. data/website/index.html +0 -385
  233. data/website/index.txt +0 -423
  234. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  235. data/website/stylesheets/screen.css +0 -130
  236. data/website/template.rhtml +0 -48
@@ -0,0 +1,151 @@
1
+ require 'forwardable'
2
+ module Roo
3
+ class Excelx
4
+ class Sheet
5
+ extend Forwardable
6
+
7
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
8
+
9
+ attr_reader :images
10
+
11
+ def initialize(name, shared, sheet_index, options = {})
12
+ @name = name
13
+ @shared = shared
14
+ @sheet_index = sheet_index
15
+ @images = Images.new(image_rels[sheet_index]).list
16
+ @rels = Relationships.new(rels_files[sheet_index])
17
+ @comments = Comments.new(comments_files[sheet_index])
18
+ @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
19
+ end
20
+
21
+ def cells
22
+ @cells ||= @sheet.cells(@rels)
23
+ end
24
+
25
+ def present_cells
26
+ @present_cells ||= begin
27
+ warn %{
28
+ [DEPRECATION] present_cells is deprecated. Alternate:
29
+ with activesupport => cells[key].presence
30
+ without activesupport => cells[key]&.presence
31
+ }
32
+ cells.select { |_, cell| cell&.presence }
33
+ end
34
+ end
35
+
36
+ # Yield each row as array of Excelx::Cell objects
37
+ # accepts options max_rows (int) (offset by 1 for header),
38
+ # pad_cells (boolean) and offset (int)
39
+ def each_row(options = {}, &block)
40
+ row_count = 0
41
+ options[:offset] ||= 0
42
+ @sheet.each_row_streaming do |row|
43
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
44
+ if block_given? && !(options[:offset] && row_count < options[:offset])
45
+ block.call(cells_for_row_element(row, options))
46
+ end
47
+ row_count += 1
48
+ end
49
+ end
50
+
51
+ def row(row_number)
52
+ first_column.upto(last_column).map do |col|
53
+ cells[[row_number, col]]&.value
54
+ end
55
+ end
56
+
57
+ def column(col_number)
58
+ first_row.upto(last_row).map do |row|
59
+ cells[[row, col_number]]&.value
60
+ end
61
+ end
62
+
63
+ # returns the number of the first non-empty row
64
+ def first_row
65
+ @first_row ||= first_last_row_col[:first_row]
66
+ end
67
+
68
+ def last_row
69
+ @last_row ||= first_last_row_col[:last_row]
70
+ end
71
+
72
+ # returns the number of the first non-empty column
73
+ def first_column
74
+ @first_column ||= first_last_row_col[:first_column]
75
+ end
76
+
77
+ # returns the number of the last non-empty column
78
+ def last_column
79
+ @last_column ||= first_last_row_col[:last_column]
80
+ end
81
+
82
+ def excelx_format(key)
83
+ cell = cells[key]
84
+ styles.style_format(cell.style).to_s if cell
85
+ end
86
+
87
+ def hyperlinks
88
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
89
+ end
90
+
91
+ def comments
92
+ @comments.comments
93
+ end
94
+
95
+ def dimensions
96
+ @sheet.dimensions
97
+ end
98
+
99
+ private
100
+
101
+ # Take an xml row and return an array of Excelx::Cell objects
102
+ # optionally pad array to header width(assumed 1st row).
103
+ # takes option pad_cells (boolean) defaults false
104
+ def cells_for_row_element(row_element, options = {})
105
+ return [] unless row_element
106
+ cell_col = 0
107
+ cells = []
108
+ @sheet.each_cell(row_element) do |cell|
109
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
110
+ cells << cell
111
+ cell_col = cell.coordinate.column
112
+ end
113
+ cells
114
+ end
115
+
116
+ def pad_cells(cell, last_column)
117
+ pad = []
118
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
119
+ pad
120
+ end
121
+
122
+ def first_last_row_col
123
+ @first_last_row_col ||= begin
124
+ first_row = last_row = first_col = last_col = nil
125
+
126
+ cells.each do |(row, col), cell|
127
+ next unless cell&.presence
128
+ first_row ||= row
129
+ last_row ||= row
130
+ first_col ||= col
131
+ last_col ||= col
132
+
133
+ if row > last_row
134
+ last_row = row
135
+ elsif row < first_row
136
+ first_row = row
137
+ end
138
+
139
+ if col > last_col
140
+ last_col = col
141
+ elsif col < first_col
142
+ first_col = col
143
+ end
144
+ end
145
+
146
+ {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
147
+ end
148
+ end
149
+ end
150
+ end
151
+ end
@@ -0,0 +1,257 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'forwardable'
4
+ require 'roo/excelx/extractor'
5
+
6
+ module Roo
7
+ class Excelx
8
+ class SheetDoc < Excelx::Extractor
9
+ extend Forwardable
10
+ delegate [:workbook] => :@shared
11
+
12
+ def initialize(path, relationships, shared, options = {})
13
+ super(path)
14
+ @shared = shared
15
+ @options = options
16
+ @relationships = relationships
17
+ end
18
+
19
+ def cells(relationships)
20
+ @cells ||= extract_cells(relationships)
21
+ end
22
+
23
+ def hyperlinks(relationships)
24
+ # If you're sure you're not going to need this hyperlinks you can discard it
25
+ @hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink")
26
+ {}
27
+ else
28
+ extract_hyperlinks(relationships)
29
+ end
30
+ end
31
+
32
+ # Get the dimensions for the sheet.
33
+ # This is the upper bound of cells that might
34
+ # be parsed. (the document may be sparse so cell count is only upper bound)
35
+ def dimensions
36
+ @dimensions ||= extract_dimensions
37
+ end
38
+
39
+ # Yield each row xml element to caller
40
+ def each_row_streaming(&block)
41
+ Roo::Utils.each_element(@path, 'row', &block)
42
+ end
43
+
44
+ # Yield each cell as Excelx::Cell to caller for given
45
+ # row xml
46
+ def each_cell(row_xml)
47
+ return [] unless row_xml
48
+ row_xml.children.each do |cell_element|
49
+ coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
50
+ hyperlinks = hyperlinks(@relationships)[coordinate]
51
+
52
+ yield cell_from_xml(cell_element, hyperlinks, coordinate)
53
+ end
54
+ end
55
+
56
+ private
57
+
58
+ def cell_value_type(type, format)
59
+ case type
60
+ when 's'
61
+ :shared
62
+ when 'b'
63
+ :boolean
64
+ when 'str'
65
+ :string
66
+ when 'inlineStr'
67
+ :inlinestr
68
+ else
69
+ Excelx::Format.to_type(format)
70
+ end
71
+ end
72
+
73
+ # Internal: Creates a cell based on an XML clell..
74
+ #
75
+ # cell_xml - a Nokogiri::XML::Element. e.g.
76
+ # <c r="A5" s="2">
77
+ # <v>22606</v>
78
+ # </c>
79
+ # hyperlink - a String for the hyperlink for the cell or nil when no
80
+ # hyperlink is present.
81
+ # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
82
+ # or nil to extract coordinate from cell_xml.
83
+ # empty_cell - an Optional Boolean value.
84
+ #
85
+ # Examples
86
+ #
87
+ # cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
88
+ # # => <Excelx::Cell::String>
89
+ #
90
+ # Returns a type of <Excelx::Cell>.
91
+ def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
92
+ coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
93
+ cell_xml_children = cell_xml.children
94
+ return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
95
+
96
+ # NOTE: This is error prone, to_i will silently turn a nil into a 0.
97
+ # This works by coincidence because Format[0] is General.
98
+ style = cell_xml["s"].to_i
99
+ formula = nil
100
+
101
+ cell_xml_children.each do |cell|
102
+ case cell.name
103
+ when 'is'
104
+ content = cell.search('t').map(&:content).join
105
+ unless content.empty?
106
+ return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
107
+ end
108
+ when 'f'
109
+ formula = cell.content
110
+ when 'v'
111
+ format = style_format(style)
112
+ value_type = cell_value_type(cell_xml["t"], format)
113
+
114
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
115
+ end
116
+ end
117
+
118
+ create_empty_cell(coordinate, empty_cell)
119
+ end
120
+
121
+ def create_empty_cell(coordinate, empty_cell)
122
+ if empty_cell
123
+ Excelx::Cell::Empty.new(coordinate)
124
+ end
125
+ end
126
+
127
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
128
+ # NOTE: format.to_s can replace excelx_type as an argument for
129
+ # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
130
+ # it will break some brittle tests.
131
+ excelx_type = [:numeric_or_formula, format.to_s]
132
+
133
+ # NOTE: There are only a few situations where value != cell.content
134
+ # 1. when a sharedString is used. value = sharedString;
135
+ # cell.content = id of sharedString
136
+ # 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
137
+ # But a boolean cell should use TRUE|FALSE as the formatted value
138
+ # and use a Boolean for it's value. Using a Boolean value breaks
139
+ # Roo::Base#to_csv.
140
+ # 3. formula
141
+ case value_type
142
+ when :shared
143
+ cell_content = cell.content.to_i
144
+ value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
145
+ Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
146
+ when :boolean, :string
147
+ value = cell.content
148
+ Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
149
+ when :time, :datetime
150
+ cell_content = cell.content.to_f
151
+ # NOTE: A date will be a whole number. A time will have be > 1. And
152
+ # in general, a datetime will have decimals. But if the cell is
153
+ # using a custom format, it's possible to be interpreted incorrectly.
154
+ # cell_content.to_i == cell_content && standard_style?=> :date
155
+ #
156
+ # Should check to see if the format is standard or not. If it's a
157
+ # standard format, than it's a date, otherwise, it is a datetime.
158
+ # @styles.standard_style?(style_id)
159
+ # STANDARD_STYLES.keys.include?(style_id.to_i)
160
+ cell_type = if cell_content < 1.0
161
+ :time
162
+ elsif (cell_content - cell_content.floor).abs > 0.000001
163
+ :datetime
164
+ else
165
+ :date
166
+ end
167
+ base_value = cell_type == :date ? base_date : base_timestamp
168
+ Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
169
+ when :date
170
+ Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
171
+ else
172
+ Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
173
+ end
174
+ end
175
+
176
+ def extract_hyperlinks(relationships)
177
+ return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
178
+
179
+ hyperlinks.each_with_object({}) do |hyperlink, hash|
180
+ if relationship = relationships[hyperlink['id']]
181
+ target_link = relationship['Target']
182
+ target_link += "##{hyperlink['location']}" if hyperlink['location']
183
+
184
+ Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord|
185
+ hash[coord] = target_link
186
+ end
187
+ end
188
+ end
189
+ end
190
+
191
+ def expand_merged_ranges(cells)
192
+ # Extract merged ranges from xml
193
+ merges = {}
194
+ doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
195
+ src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
196
+ next unless cells[src]
197
+ for row in src[0]..dst[0] do
198
+ for col in src[1]..dst[1] do
199
+ next if row == src[0] && col == src[1]
200
+ merges[[row, col]] = src
201
+ end
202
+ end
203
+ end
204
+ # Duplicate value into all cells in merged range
205
+ merges.each do |dst, src|
206
+ cells[dst] = cells[src]
207
+ end
208
+ end
209
+
210
+ def extract_cells(relationships)
211
+ extracted_cells = {}
212
+ empty_cell = @options[:empty_cell]
213
+
214
+ doc.xpath('/worksheet/sheetData/row').each.with_index(1) do |row_xml, ycoord|
215
+ row_xml.xpath('c').each.with_index(1) do |cell_xml, xcoord|
216
+ r = cell_xml['r']
217
+ coordinate =
218
+ if r.nil?
219
+ ::Roo::Excelx::Coordinate.new(ycoord, xcoord)
220
+ else
221
+ ::Roo::Utils.extract_coordinate(r)
222
+ end
223
+
224
+ cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
225
+ extracted_cells[coordinate] = cell if cell
226
+ end
227
+ end
228
+
229
+ expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
230
+
231
+ extracted_cells
232
+ end
233
+
234
+ def extract_dimensions
235
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
236
+ return dimension["ref"]
237
+ end
238
+ end
239
+
240
+ def style_format(style)
241
+ @shared.styles.style_format(style)
242
+ end
243
+
244
+ def base_date
245
+ @shared.base_date
246
+ end
247
+
248
+ def base_timestamp
249
+ @shared.base_timestamp
250
+ end
251
+
252
+ def shared_strings
253
+ @shared.shared_strings
254
+ end
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,64 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx
6
+ class Styles < Excelx::Extractor
7
+ # convert internal excelx attribute to a format
8
+ def style_format(style)
9
+ id = num_fmt_ids[style.to_i]
10
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
11
+ end
12
+
13
+ def definitions
14
+ @definitions ||= extract_definitions
15
+ end
16
+
17
+ private
18
+
19
+ def num_fmt_ids
20
+ @num_fmt_ids ||= extract_num_fmt_ids
21
+ end
22
+
23
+ def num_fmts
24
+ @num_fmts ||= extract_num_fmts
25
+ end
26
+
27
+ def fonts
28
+ @fonts ||= extract_fonts
29
+ end
30
+
31
+ def extract_definitions
32
+ doc.xpath('//cellXfs').flat_map do |xfs|
33
+ xfs.children.map do |xf|
34
+ fonts[xf['fontId'].to_i]
35
+ end
36
+ end
37
+ end
38
+
39
+ def extract_fonts
40
+ doc.xpath('//fonts/font').map do |font_el|
41
+ Font.new.tap do |font|
42
+ font.bold = !font_el.xpath('./b').empty?
43
+ font.italic = !font_el.xpath('./i').empty?
44
+ font.underline = !font_el.xpath('./u').empty?
45
+ end
46
+ end
47
+ end
48
+
49
+ def extract_num_fmt_ids
50
+ doc.xpath('//cellXfs').flat_map do |xfs|
51
+ xfs.children.map do |xf|
52
+ xf['numFmtId']
53
+ end
54
+ end.compact
55
+ end
56
+
57
+ def extract_num_fmts
58
+ doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
59
+ hash[num_fmt['numFmtId']] = num_fmt['formatCode']
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,64 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Workbook < Excelx::Extractor
6
+ class Label
7
+ attr_reader :sheet, :row, :col, :name
8
+
9
+ def initialize(name, sheet, row, col)
10
+ @name = name
11
+ @sheet = sheet
12
+ @row = row.to_i
13
+ @col = ::Roo::Utils.letter_to_number(col)
14
+ end
15
+
16
+ def key
17
+ [@row, @col]
18
+ end
19
+ end
20
+
21
+ def initialize(path)
22
+ super
23
+ fail ArgumentError, 'missing required workbook file' unless doc_exists?
24
+ end
25
+
26
+ def sheets
27
+ doc.xpath('//sheet')
28
+ end
29
+
30
+ # aka labels
31
+ def defined_names
32
+ doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
33
+ # "Sheet1!$C$5"
34
+ sheet, coordinates = defined_name.text.split('!$', 2)
35
+ next unless coordinates
36
+ col, row = coordinates.split('$')
37
+ name = defined_name['name']
38
+ hash[name] = Label.new(name, sheet, row, col)
39
+ end
40
+ end
41
+
42
+ def base_timestamp
43
+ @base_timestamp ||= base_date.to_datetime.to_time.to_i
44
+ end
45
+
46
+ def base_date
47
+ @base_date ||=
48
+ begin
49
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
50
+ # it's set in the Workbook's workbookPr
51
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
52
+ result = Date.new(1899, 12, 30) # default
53
+ doc.css('workbookPr[date1904]').each do |workbookPr|
54
+ if workbookPr['date1904'] =~ /true|1/i
55
+ result = Date.new(1904, 01, 01)
56
+ break
57
+ end
58
+ end
59
+ result
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end