roo 1.13.2 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +17 -0
  3. data/.github/ISSUE_TEMPLATE +10 -0
  4. data/.gitignore +11 -0
  5. data/.simplecov +4 -0
  6. data/.travis.yml +17 -0
  7. data/CHANGELOG.md +626 -0
  8. data/Gemfile +17 -12
  9. data/Gemfile_ruby2 +30 -0
  10. data/Guardfile +23 -0
  11. data/LICENSE +3 -1
  12. data/README.md +285 -0
  13. data/Rakefile +23 -23
  14. data/examples/roo_soap_client.rb +28 -31
  15. data/examples/roo_soap_server.rb +4 -6
  16. data/examples/write_me.rb +9 -10
  17. data/lib/roo/base.rb +298 -495
  18. data/lib/roo/constants.rb +5 -0
  19. data/lib/roo/csv.rb +127 -113
  20. data/lib/roo/errors.rb +11 -0
  21. data/lib/roo/excelx/cell/base.rb +94 -0
  22. data/lib/roo/excelx/cell/boolean.rb +27 -0
  23. data/lib/roo/excelx/cell/date.rb +28 -0
  24. data/lib/roo/excelx/cell/datetime.rb +111 -0
  25. data/lib/roo/excelx/cell/empty.rb +19 -0
  26. data/lib/roo/excelx/cell/number.rb +87 -0
  27. data/lib/roo/excelx/cell/string.rb +19 -0
  28. data/lib/roo/excelx/cell/time.rb +43 -0
  29. data/lib/roo/excelx/cell.rb +106 -0
  30. data/lib/roo/excelx/comments.rb +55 -0
  31. data/lib/roo/excelx/coordinate.rb +12 -0
  32. data/lib/roo/excelx/extractor.rb +21 -0
  33. data/lib/roo/excelx/format.rb +64 -0
  34. data/lib/roo/excelx/relationships.rb +25 -0
  35. data/lib/roo/excelx/shared.rb +32 -0
  36. data/lib/roo/excelx/shared_strings.rb +157 -0
  37. data/lib/roo/excelx/sheet.rb +112 -0
  38. data/lib/roo/excelx/sheet_doc.rb +211 -0
  39. data/lib/roo/excelx/styles.rb +64 -0
  40. data/lib/roo/excelx/workbook.rb +59 -0
  41. data/lib/roo/excelx.rb +376 -602
  42. data/lib/roo/font.rb +17 -0
  43. data/lib/roo/formatters/base.rb +15 -0
  44. data/lib/roo/formatters/csv.rb +84 -0
  45. data/lib/roo/formatters/matrix.rb +23 -0
  46. data/lib/roo/formatters/xml.rb +31 -0
  47. data/lib/roo/formatters/yaml.rb +40 -0
  48. data/lib/roo/libre_office.rb +4 -0
  49. data/lib/roo/link.rb +34 -0
  50. data/lib/roo/open_office.rb +626 -0
  51. data/lib/roo/spreadsheet.rb +22 -23
  52. data/lib/roo/tempdir.rb +21 -0
  53. data/lib/roo/utils.rb +78 -0
  54. data/lib/roo/version.rb +3 -0
  55. data/lib/roo.rb +23 -24
  56. data/roo.gemspec +21 -204
  57. data/spec/helpers.rb +5 -0
  58. data/spec/lib/roo/base_spec.rb +229 -3
  59. data/spec/lib/roo/csv_spec.rb +38 -11
  60. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  61. data/spec/lib/roo/excelx_spec.rb +510 -11
  62. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  63. data/spec/lib/roo/openoffice_spec.rb +30 -8
  64. data/spec/lib/roo/spreadsheet_spec.rb +60 -12
  65. data/spec/lib/roo/utils_spec.rb +106 -0
  66. data/spec/spec_helper.rb +7 -6
  67. data/test/all_ss.rb +12 -11
  68. data/test/excelx/cell/test_base.rb +63 -0
  69. data/test/excelx/cell/test_boolean.rb +36 -0
  70. data/test/excelx/cell/test_date.rb +38 -0
  71. data/test/excelx/cell/test_datetime.rb +45 -0
  72. data/test/excelx/cell/test_empty.rb +7 -0
  73. data/test/excelx/cell/test_number.rb +74 -0
  74. data/test/excelx/cell/test_string.rb +28 -0
  75. data/test/excelx/cell/test_time.rb +30 -0
  76. data/test/formatters/test_csv.rb +119 -0
  77. data/test/formatters/test_matrix.rb +76 -0
  78. data/test/formatters/test_xml.rb +74 -0
  79. data/test/formatters/test_yaml.rb +20 -0
  80. data/test/roo/test_csv.rb +52 -0
  81. data/test/roo/test_excelx.rb +186 -0
  82. data/test/roo/test_libre_office.rb +9 -0
  83. data/test/roo/test_open_office.rb +126 -0
  84. data/test/test_helper.rb +73 -53
  85. data/test/test_roo.rb +1211 -2292
  86. metadata +119 -298
  87. data/CHANGELOG +0 -417
  88. data/Gemfile.lock +0 -78
  89. data/README.markdown +0 -126
  90. data/VERSION +0 -1
  91. data/lib/roo/excel.rb +0 -355
  92. data/lib/roo/excel2003xml.rb +0 -300
  93. data/lib/roo/google.rb +0 -292
  94. data/lib/roo/openoffice.rb +0 -496
  95. data/lib/roo/roo_rails_helper.rb +0 -83
  96. data/lib/roo/worksheet.rb +0 -18
  97. data/scripts/txt2html +0 -67
  98. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  99. data/spec/lib/roo/excel_spec.rb +0 -17
  100. data/spec/lib/roo/google_spec.rb +0 -64
  101. data/test/files/1900_base.xls +0 -0
  102. data/test/files/1900_base.xlsx +0 -0
  103. data/test/files/1904_base.xls +0 -0
  104. data/test/files/1904_base.xlsx +0 -0
  105. data/test/files/Bibelbund.csv +0 -3741
  106. data/test/files/Bibelbund.ods +0 -0
  107. data/test/files/Bibelbund.xls +0 -0
  108. data/test/files/Bibelbund.xlsx +0 -0
  109. data/test/files/Bibelbund.xml +0 -62518
  110. data/test/files/Bibelbund1.ods +0 -0
  111. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  112. data/test/files/bad_excel_date.xls +0 -0
  113. data/test/files/bbu.ods +0 -0
  114. data/test/files/bbu.xls +0 -0
  115. data/test/files/bbu.xlsx +0 -0
  116. data/test/files/bbu.xml +0 -152
  117. data/test/files/bode-v1.ods.zip +0 -0
  118. data/test/files/bode-v1.xls.zip +0 -0
  119. data/test/files/boolean.csv +0 -2
  120. data/test/files/boolean.ods +0 -0
  121. data/test/files/boolean.xls +0 -0
  122. data/test/files/boolean.xlsx +0 -0
  123. data/test/files/boolean.xml +0 -112
  124. data/test/files/borders.ods +0 -0
  125. data/test/files/borders.xls +0 -0
  126. data/test/files/borders.xlsx +0 -0
  127. data/test/files/borders.xml +0 -144
  128. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  129. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  130. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  131. data/test/files/comments.ods +0 -0
  132. data/test/files/comments.xls +0 -0
  133. data/test/files/comments.xlsx +0 -0
  134. data/test/files/csvtypes.csv +0 -1
  135. data/test/files/datetime.ods +0 -0
  136. data/test/files/datetime.xls +0 -0
  137. data/test/files/datetime.xlsx +0 -0
  138. data/test/files/datetime.xml +0 -142
  139. data/test/files/datetime_floatconv.xls +0 -0
  140. data/test/files/datetime_floatconv.xml +0 -148
  141. data/test/files/dreimalvier.ods +0 -0
  142. data/test/files/emptysheets.ods +0 -0
  143. data/test/files/emptysheets.xls +0 -0
  144. data/test/files/emptysheets.xlsx +0 -0
  145. data/test/files/emptysheets.xml +0 -105
  146. data/test/files/excel2003.xml +0 -21140
  147. data/test/files/false_encoding.xls +0 -0
  148. data/test/files/false_encoding.xml +0 -132
  149. data/test/files/file_item_error.xlsx +0 -0
  150. data/test/files/formula.ods +0 -0
  151. data/test/files/formula.xls +0 -0
  152. data/test/files/formula.xlsx +0 -0
  153. data/test/files/formula.xml +0 -134
  154. data/test/files/formula_parse_error.xls +0 -0
  155. data/test/files/formula_parse_error.xml +0 -1833
  156. data/test/files/formula_string_error.xlsx +0 -0
  157. data/test/files/html-escape.ods +0 -0
  158. data/test/files/link.xls +0 -0
  159. data/test/files/link.xlsx +0 -0
  160. data/test/files/matrix.ods +0 -0
  161. data/test/files/matrix.xls +0 -0
  162. data/test/files/named_cells.ods +0 -0
  163. data/test/files/named_cells.xls +0 -0
  164. data/test/files/named_cells.xlsx +0 -0
  165. data/test/files/no_spreadsheet_file.txt +0 -1
  166. data/test/files/numbers1.csv +0 -18
  167. data/test/files/numbers1.ods +0 -0
  168. data/test/files/numbers1.xls +0 -0
  169. data/test/files/numbers1.xlsx +0 -0
  170. data/test/files/numbers1.xml +0 -312
  171. data/test/files/numeric-link.xlsx +0 -0
  172. data/test/files/only_one_sheet.ods +0 -0
  173. data/test/files/only_one_sheet.xls +0 -0
  174. data/test/files/only_one_sheet.xlsx +0 -0
  175. data/test/files/only_one_sheet.xml +0 -67
  176. data/test/files/paragraph.ods +0 -0
  177. data/test/files/paragraph.xls +0 -0
  178. data/test/files/paragraph.xlsx +0 -0
  179. data/test/files/paragraph.xml +0 -127
  180. data/test/files/prova.xls +0 -0
  181. data/test/files/ric.ods +0 -0
  182. data/test/files/simple_spreadsheet.ods +0 -0
  183. data/test/files/simple_spreadsheet.xls +0 -0
  184. data/test/files/simple_spreadsheet.xlsx +0 -0
  185. data/test/files/simple_spreadsheet.xml +0 -225
  186. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  187. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  188. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  189. data/test/files/so_datetime.csv +0 -7
  190. data/test/files/style.ods +0 -0
  191. data/test/files/style.xls +0 -0
  192. data/test/files/style.xlsx +0 -0
  193. data/test/files/style.xml +0 -154
  194. data/test/files/time-test.csv +0 -2
  195. data/test/files/time-test.ods +0 -0
  196. data/test/files/time-test.xls +0 -0
  197. data/test/files/time-test.xlsx +0 -0
  198. data/test/files/time-test.xml +0 -131
  199. data/test/files/type_excel.ods +0 -0
  200. data/test/files/type_excel.xlsx +0 -0
  201. data/test/files/type_excelx.ods +0 -0
  202. data/test/files/type_excelx.xls +0 -0
  203. data/test/files/type_openoffice.xls +0 -0
  204. data/test/files/type_openoffice.xlsx +0 -0
  205. data/test/files/whitespace.ods +0 -0
  206. data/test/files/whitespace.xls +0 -0
  207. data/test/files/whitespace.xlsx +0 -0
  208. data/test/files/whitespace.xml +0 -184
  209. data/test/rm_sub_test.rb +0 -12
  210. data/test/rm_test.rb +0 -7
  211. data/test/test_generic_spreadsheet.rb +0 -259
  212. data/website/index.html +0 -385
  213. data/website/index.txt +0 -423
  214. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  215. data/website/stylesheets/screen.css +0 -130
  216. data/website/template.rhtml +0 -48
@@ -0,0 +1,43 @@
1
+ require 'date'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Cell
6
+ class Time < Roo::Excelx::Cell::DateTime
7
+ attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
8
+
9
+ def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
+ # NOTE: Pass all arguments to DateTime super class.
11
+ super
12
+ @type = :time
13
+ @format = excelx_type.last
14
+ @datetime = create_datetime(base_date, value)
15
+ @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
16
+ end
17
+
18
+ def formatted_value
19
+ formatter = @format.gsub(/#{TIME_FORMATS.keys.join('|')}/, TIME_FORMATS)
20
+ @datetime.strftime(formatter)
21
+ end
22
+
23
+ alias_method :to_s, :formatted_value
24
+
25
+ private
26
+
27
+ # def create_datetime(base_date, value)
28
+ # date = base_date + value.to_f.round(6)
29
+ # datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
30
+ # t = round_datetime(datetime_string)
31
+ #
32
+ # ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
33
+ # end
34
+
35
+ # def round_datetime(datetime_string)
36
+ # /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
37
+ #
38
+ # ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
39
+ # end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,106 @@
1
+ require 'date'
2
+ require 'roo/excelx/cell/base'
3
+ require 'roo/excelx/cell/boolean'
4
+ require 'roo/excelx/cell/datetime'
5
+ require 'roo/excelx/cell/date'
6
+ require 'roo/excelx/cell/empty'
7
+ require 'roo/excelx/cell/number'
8
+ require 'roo/excelx/cell/string'
9
+ require 'roo/excelx/cell/time'
10
+
11
+ module Roo
12
+ class Excelx
13
+ class Cell
14
+ attr_reader :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
15
+ attr_writer :value
16
+
17
+ # DEPRECATED: Please use Cell.create_cell instead.
18
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
19
+ warn '[DEPRECATION] `Cell.new` is deprecated. Please use `Cell.create_cell` instead.'
20
+ @type = type
21
+ @formula = formula
22
+ @base_date = base_date if [:date, :datetime].include?(@type)
23
+ @excelx_type = excelx_type
24
+ @excelx_value = excelx_value
25
+ @style = style
26
+ @value = type_cast_value(value)
27
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
28
+ @coordinate = coordinate
29
+ end
30
+
31
+ def type
32
+ case
33
+ when @formula
34
+ :formula
35
+ when @value.is_a?(Roo::Link)
36
+ :link
37
+ else
38
+ @type
39
+ end
40
+ end
41
+
42
+ def self.create_cell(type, *values)
43
+ case type
44
+ when :string
45
+ Cell::String.new(*values)
46
+ when :boolean
47
+ Cell::Boolean.new(*values)
48
+ when :number
49
+ Cell::Number.new(*values)
50
+ when :date
51
+ Cell::Date.new(*values)
52
+ when :datetime
53
+ Cell::DateTime.new(*values)
54
+ when :time
55
+ Cell::Time.new(*values)
56
+ end
57
+ end
58
+
59
+ # Deprecated: use Roo::Excelx::Coordinate instead.
60
+ class Coordinate
61
+ attr_accessor :row, :column
62
+
63
+ def initialize(row, column)
64
+ warn '[DEPRECATION] `Roo::Excel::Cell::Coordinate` is deprecated. Please use `Roo::Excelx::Coordinate` instead.'
65
+ @row, @column = row, column
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def type_cast_value(value)
72
+ case @type
73
+ when :float, :percentage
74
+ value.to_f
75
+ when :date
76
+ create_date(@base_date + value.to_i)
77
+ when :datetime
78
+ create_datetime(@base_date + value.to_f.round(6))
79
+ when :time
80
+ value.to_f * 86_400
81
+ else
82
+ value
83
+ end
84
+ end
85
+
86
+ def create_date(date)
87
+ yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
88
+
89
+ ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
90
+ end
91
+
92
+ def create_datetime(date)
93
+ datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
94
+ t = round_datetime(datetime_string)
95
+
96
+ ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
97
+ end
98
+
99
+ def round_datetime(datetime_string)
100
+ /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
101
+
102
+ ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,55 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Comments < Excelx::Extractor
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
9
+
10
+ private
11
+
12
+ def extract_comments
13
+ return {} unless doc_exists?
14
+
15
+ Hash[doc.xpath('//comments/commentList/comment').map do |comment|
16
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
17
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
18
+ end]
19
+ end
20
+ end
21
+ end
22
+ end
23
+ # xl/comments1.xml
24
+ # <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
25
+ # <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
26
+ # <authors>
27
+ # <author />
28
+ # </authors>
29
+ # <commentList>
30
+ # <comment ref="B4" authorId="0">
31
+ # <text>
32
+ # <r>
33
+ # <rPr>
34
+ # <sz val="10" />
35
+ # <rFont val="Arial" />
36
+ # <family val="2" />
37
+ # </rPr>
38
+ # <t>Comment for B4</t>
39
+ # </r>
40
+ # </text>
41
+ # </comment>
42
+ # <comment ref="B5" authorId="0">
43
+ # <text>
44
+ # <r>
45
+ # <rPr>
46
+ # <sz val="10" />
47
+ # <rFont val="Arial" />
48
+ # <family val="2" />
49
+ # </rPr>
50
+ # <t>Comment for B5</t>
51
+ # </r>
52
+ # </text>
53
+ # </comment>
54
+ # </commentList>
55
+ # </comments>
@@ -0,0 +1,12 @@
1
+ module Roo
2
+ class Excelx
3
+ class Coordinate
4
+ attr_accessor :row, :column
5
+
6
+ def initialize(row, column)
7
+ @row = row
8
+ @column = column
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ module Roo
2
+ class Excelx
3
+ class Extractor
4
+ def initialize(path)
5
+ @path = path
6
+ end
7
+
8
+ private
9
+
10
+ def doc
11
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
12
+
13
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
14
+ end
15
+
16
+ def doc_exists?
17
+ @path && File.exist?(@path)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,64 @@
1
+ module Roo
2
+ class Excelx
3
+ module Format
4
+ EXCEPTIONAL_FORMATS = {
5
+ 'h:mm am/pm' => :date,
6
+ 'h:mm:ss am/pm' => :date
7
+ }
8
+
9
+ STANDARD_FORMATS = {
10
+ 0 => 'General'.freeze,
11
+ 1 => '0'.freeze,
12
+ 2 => '0.00'.freeze,
13
+ 3 => '#,##0'.freeze,
14
+ 4 => '#,##0.00'.freeze,
15
+ 9 => '0%'.freeze,
16
+ 10 => '0.00%'.freeze,
17
+ 11 => '0.00E+00'.freeze,
18
+ 12 => '# ?/?'.freeze,
19
+ 13 => '# ??/??'.freeze,
20
+ 14 => 'mm-dd-yy'.freeze,
21
+ 15 => 'd-mmm-yy'.freeze,
22
+ 16 => 'd-mmm'.freeze,
23
+ 17 => 'mmm-yy'.freeze,
24
+ 18 => 'h:mm AM/PM'.freeze,
25
+ 19 => 'h:mm:ss AM/PM'.freeze,
26
+ 20 => 'h:mm'.freeze,
27
+ 21 => 'h:mm:ss'.freeze,
28
+ 22 => 'm/d/yy h:mm'.freeze,
29
+ 37 => '#,##0 ;(#,##0)'.freeze,
30
+ 38 => '#,##0 ;[Red](#,##0)'.freeze,
31
+ 39 => '#,##0.00;(#,##0.00)'.freeze,
32
+ 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
33
+ 45 => 'mm:ss'.freeze,
34
+ 46 => '[h]:mm:ss'.freeze,
35
+ 47 => 'mmss.0'.freeze,
36
+ 48 => '##0.0E+0'.freeze,
37
+ 49 => '@'.freeze
38
+ }
39
+
40
+ def to_type(format)
41
+ format = format.to_s.downcase
42
+ if (type = EXCEPTIONAL_FORMATS[format])
43
+ type
44
+ elsif format.include?('#')
45
+ :float
46
+ elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
47
+ if format.include?('h') || format.include?('s')
48
+ :datetime
49
+ else
50
+ :date
51
+ end
52
+ elsif format.include?('h') || format.include?('s')
53
+ :time
54
+ elsif format.include?('%')
55
+ :percentage
56
+ else
57
+ :float
58
+ end
59
+ end
60
+
61
+ module_function :to_type
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,25 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Relationships < Excelx::Extractor
6
+ def [](index)
7
+ to_a[index]
8
+ end
9
+
10
+ def to_a
11
+ @relationships ||= extract_relationships
12
+ end
13
+
14
+ private
15
+
16
+ def extract_relationships
17
+ return [] unless doc_exists?
18
+
19
+ Hash[doc.xpath('/Relationships/Relationship').map do |rel|
20
+ [rel.attribute('Id').text, rel]
21
+ end]
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,32 @@
1
+ module Roo
2
+ class Excelx
3
+ # Public: Shared class for allowing sheets to share data. This should
4
+ # reduce memory usage and reduce the number of objects being passed
5
+ # to various inititializers.
6
+ class Shared
7
+ attr_accessor :comments_files, :sheet_files, :rels_files
8
+ def initialize(dir)
9
+ @dir = dir
10
+ @comments_files = []
11
+ @sheet_files = []
12
+ @rels_files = []
13
+ end
14
+
15
+ def styles
16
+ @styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
17
+ end
18
+
19
+ def shared_strings
20
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
21
+ end
22
+
23
+ def workbook
24
+ @workbook ||= Workbook.new(File.join(@dir, 'roo_workbook.xml'))
25
+ end
26
+
27
+ def base_date
28
+ workbook.base_date
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,157 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class SharedStrings < Excelx::Extractor
6
+
7
+ COMMON_STRINGS = {
8
+ t: "t",
9
+ r: "r",
10
+ html_tag_open: "<html>",
11
+ html_tag_closed: "</html>"
12
+ }
13
+
14
+ def [](index)
15
+ to_a[index]
16
+ end
17
+
18
+ def to_a
19
+ @array ||= extract_shared_strings
20
+ end
21
+
22
+ def to_html
23
+ @html ||= extract_html
24
+ end
25
+
26
+ # Use to_html or to_a for html returns
27
+ # See what is happening with commit???
28
+ def use_html?(index)
29
+ to_html[index][/<([biu]|sup|sub)>/]
30
+ end
31
+
32
+ private
33
+
34
+ def fix_invalid_shared_strings(doc)
35
+ invalid = { '_x000D_' => "\n" }
36
+ xml = doc.to_s
37
+ return doc unless xml[/#{invalid.keys.join('|')}/]
38
+
39
+ ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
40
+ end
41
+
42
+ def extract_shared_strings
43
+ return [] unless doc_exists?
44
+
45
+ document = fix_invalid_shared_strings(doc)
46
+ # read the shared strings xml document
47
+ document.xpath('/sst/si').map do |si|
48
+ shared_string = ''
49
+ si.children.each do |elem|
50
+ case elem.name
51
+ when 'r'
52
+ elem.children.each do |r_elem|
53
+ shared_string << r_elem.content if r_elem.name == 't'
54
+ end
55
+ when 't'
56
+ shared_string = elem.content
57
+ end
58
+ end
59
+ shared_string
60
+ end
61
+ end
62
+
63
+ def extract_html
64
+ return [] unless doc_exists?
65
+ fix_invalid_shared_strings(doc)
66
+ # read the shared strings xml document
67
+ doc.xpath('/sst/si').map do |si|
68
+ html_string = '<html>'
69
+ si.children.each do |elem|
70
+ case elem.name
71
+ when 'r'
72
+ html_string << extract_html_r(elem)
73
+ when 't'
74
+ html_string << elem.content
75
+ end # case elem.name
76
+ end # si.children.each do |elem|
77
+ html_string << '</html>'
78
+ end # doc.xpath('/sst/si').map do |si|
79
+ end # def extract_html
80
+
81
+ # The goal of this function is to take the following XML code snippet and create a html tag
82
+ # r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
83
+ # {code:xml}
84
+ # <r>
85
+ # <rPr>
86
+ # <i/>
87
+ # <b/>
88
+ # <u/>
89
+ # <vertAlign val="subscript"/>
90
+ # <vertAlign val="superscript"/>
91
+ # </rPr>
92
+ # <t>TEXT</t>
93
+ # </r>
94
+ # {code}
95
+ #
96
+ # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
97
+ def extract_html_r(r_elem)
98
+ str = ''
99
+ xml_elems = {
100
+ sub: false,
101
+ sup: false,
102
+ b: false,
103
+ i: false,
104
+ u: false
105
+ }
106
+ b, i, u, sub, sup = false, false, false, false, false
107
+ r_elem.children.each do |elem|
108
+ case elem.name
109
+ when 'rPr'
110
+ elem.children.each do |rPr_elem|
111
+ case rPr_elem.name
112
+ when 'b'
113
+ # set formatting for Bold to true
114
+ xml_elems[:b] = true
115
+ when 'i'
116
+ # set formatting for Italics to true
117
+ xml_elems[:i] = true
118
+ when 'u'
119
+ # set formatting for Underline to true
120
+ xml_elems[:u] = true
121
+ when 'vertAlign'
122
+ # See if the Vertical Alignment is subscript or superscript
123
+ case rPr_elem.xpath('@val').first.value
124
+ when 'subscript'
125
+ # set formatting for Subscript to true and Superscript to false ... Can't have both
126
+ xml_elems[:sub] = true
127
+ xml_elems[:sup] = false
128
+ when 'superscript'
129
+ # set formatting for Superscript to true and Subscript to false ... Can't have both
130
+ xml_elems[:sup] = true
131
+ xml_elems[:sub] = false
132
+ end
133
+ end
134
+ end
135
+ when 't'
136
+ str << create_html(elem.content, xml_elems)
137
+ end
138
+ end
139
+ str
140
+ end # extract_html_r
141
+
142
+ # This will return an html string
143
+ def create_html(text, formatting)
144
+ tmp_str = ''
145
+ formatting.each do |elem, val|
146
+ tmp_str << "<#{elem}>" if val
147
+ end
148
+ tmp_str << text
149
+ reverse_format = Hash[formatting.to_a.reverse]
150
+ reverse_format.each do |elem, val|
151
+ tmp_str << "</#{elem}>" if val
152
+ end
153
+ tmp_str
154
+ end
155
+ end # class SharedStrings < Excelx::Extractor
156
+ end # class Excelx
157
+ end # module Roo
@@ -0,0 +1,112 @@
1
+ require 'forwardable'
2
+ module Roo
3
+ class Excelx
4
+ class Sheet
5
+ extend Forwardable
6
+
7
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
8
+
9
+ def initialize(name, shared, sheet_index, options = {})
10
+ @name = name
11
+ @shared = shared
12
+ @rels = Relationships.new(rels_files[sheet_index])
13
+ @comments = Comments.new(comments_files[sheet_index])
14
+ @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
15
+ end
16
+
17
+ def cells
18
+ @cells ||= @sheet.cells(@rels)
19
+ end
20
+
21
+ def present_cells
22
+ @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
23
+ end
24
+
25
+ # Yield each row as array of Excelx::Cell objects
26
+ # accepts options max_rows (int) (offset by 1 for header),
27
+ # pad_cells (boolean) and offset (int)
28
+ def each_row(options = {}, &block)
29
+ row_count = 0
30
+ options[:offset] ||= 0
31
+ @sheet.each_row_streaming do |row|
32
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
33
+ if block_given? && !(options[:offset] && row_count < options[:offset])
34
+ block.call(cells_for_row_element(row, options))
35
+ end
36
+ row_count += 1
37
+ end
38
+ end
39
+
40
+ def row(row_number)
41
+ first_column.upto(last_column).map do |col|
42
+ cells[[row_number, col]]
43
+ end.map { |cell| cell && cell.value }
44
+ end
45
+
46
+ def column(col_number)
47
+ first_row.upto(last_row).map do |row|
48
+ cells[[row, col_number]]
49
+ end.map { |cell| cell && cell.value }
50
+ end
51
+
52
+ # returns the number of the first non-empty row
53
+ def first_row
54
+ @first_row ||= present_cells.keys.map { |row, _| row }.min
55
+ end
56
+
57
+ def last_row
58
+ @last_row ||= present_cells.keys.map { |row, _| row }.max
59
+ end
60
+
61
+ # returns the number of the first non-empty column
62
+ def first_column
63
+ @first_column ||= present_cells.keys.map { |_, col| col }.min
64
+ end
65
+
66
+ # returns the number of the last non-empty column
67
+ def last_column
68
+ @last_column ||= present_cells.keys.map { |_, col| col }.max
69
+ end
70
+
71
+ def excelx_format(key)
72
+ cell = cells[key]
73
+ styles.style_format(cell.style).to_s if cell
74
+ end
75
+
76
+ def hyperlinks
77
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
78
+ end
79
+
80
+ def comments
81
+ @comments.comments
82
+ end
83
+
84
+ def dimensions
85
+ @sheet.dimensions
86
+ end
87
+
88
+ private
89
+
90
+ # Take an xml row and return an array of Excelx::Cell objects
91
+ # optionally pad array to header width(assumed 1st row).
92
+ # takes option pad_cells (boolean) defaults false
93
+ def cells_for_row_element(row_element, options = {})
94
+ return [] unless row_element
95
+ cell_col = 0
96
+ cells = []
97
+ @sheet.each_cell(row_element) do |cell|
98
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
99
+ cells << cell
100
+ cell_col = cell.coordinate.column
101
+ end
102
+ cells
103
+ end
104
+
105
+ def pad_cells(cell, last_column)
106
+ pad = []
107
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
108
+ pad
109
+ end
110
+ end
111
+ end
112
+ end