roo 1.13.2 → 2.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/.codeclimate.yml +17 -0
  3. data/.github/ISSUE_TEMPLATE +10 -0
  4. data/.gitignore +11 -0
  5. data/.simplecov +4 -0
  6. data/.travis.yml +17 -0
  7. data/CHANGELOG.md +626 -0
  8. data/Gemfile +17 -12
  9. data/Gemfile_ruby2 +30 -0
  10. data/Guardfile +23 -0
  11. data/LICENSE +3 -1
  12. data/README.md +285 -0
  13. data/Rakefile +23 -23
  14. data/examples/roo_soap_client.rb +28 -31
  15. data/examples/roo_soap_server.rb +4 -6
  16. data/examples/write_me.rb +9 -10
  17. data/lib/roo/base.rb +298 -495
  18. data/lib/roo/constants.rb +5 -0
  19. data/lib/roo/csv.rb +127 -113
  20. data/lib/roo/errors.rb +11 -0
  21. data/lib/roo/excelx/cell/base.rb +94 -0
  22. data/lib/roo/excelx/cell/boolean.rb +27 -0
  23. data/lib/roo/excelx/cell/date.rb +28 -0
  24. data/lib/roo/excelx/cell/datetime.rb +111 -0
  25. data/lib/roo/excelx/cell/empty.rb +19 -0
  26. data/lib/roo/excelx/cell/number.rb +87 -0
  27. data/lib/roo/excelx/cell/string.rb +19 -0
  28. data/lib/roo/excelx/cell/time.rb +43 -0
  29. data/lib/roo/excelx/cell.rb +106 -0
  30. data/lib/roo/excelx/comments.rb +55 -0
  31. data/lib/roo/excelx/coordinate.rb +12 -0
  32. data/lib/roo/excelx/extractor.rb +21 -0
  33. data/lib/roo/excelx/format.rb +64 -0
  34. data/lib/roo/excelx/relationships.rb +25 -0
  35. data/lib/roo/excelx/shared.rb +32 -0
  36. data/lib/roo/excelx/shared_strings.rb +157 -0
  37. data/lib/roo/excelx/sheet.rb +112 -0
  38. data/lib/roo/excelx/sheet_doc.rb +211 -0
  39. data/lib/roo/excelx/styles.rb +64 -0
  40. data/lib/roo/excelx/workbook.rb +59 -0
  41. data/lib/roo/excelx.rb +376 -602
  42. data/lib/roo/font.rb +17 -0
  43. data/lib/roo/formatters/base.rb +15 -0
  44. data/lib/roo/formatters/csv.rb +84 -0
  45. data/lib/roo/formatters/matrix.rb +23 -0
  46. data/lib/roo/formatters/xml.rb +31 -0
  47. data/lib/roo/formatters/yaml.rb +40 -0
  48. data/lib/roo/libre_office.rb +4 -0
  49. data/lib/roo/link.rb +34 -0
  50. data/lib/roo/open_office.rb +626 -0
  51. data/lib/roo/spreadsheet.rb +22 -23
  52. data/lib/roo/tempdir.rb +21 -0
  53. data/lib/roo/utils.rb +78 -0
  54. data/lib/roo/version.rb +3 -0
  55. data/lib/roo.rb +23 -24
  56. data/roo.gemspec +21 -204
  57. data/spec/helpers.rb +5 -0
  58. data/spec/lib/roo/base_spec.rb +229 -3
  59. data/spec/lib/roo/csv_spec.rb +38 -11
  60. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  61. data/spec/lib/roo/excelx_spec.rb +510 -11
  62. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  63. data/spec/lib/roo/openoffice_spec.rb +30 -8
  64. data/spec/lib/roo/spreadsheet_spec.rb +60 -12
  65. data/spec/lib/roo/utils_spec.rb +106 -0
  66. data/spec/spec_helper.rb +7 -6
  67. data/test/all_ss.rb +12 -11
  68. data/test/excelx/cell/test_base.rb +63 -0
  69. data/test/excelx/cell/test_boolean.rb +36 -0
  70. data/test/excelx/cell/test_date.rb +38 -0
  71. data/test/excelx/cell/test_datetime.rb +45 -0
  72. data/test/excelx/cell/test_empty.rb +7 -0
  73. data/test/excelx/cell/test_number.rb +74 -0
  74. data/test/excelx/cell/test_string.rb +28 -0
  75. data/test/excelx/cell/test_time.rb +30 -0
  76. data/test/formatters/test_csv.rb +119 -0
  77. data/test/formatters/test_matrix.rb +76 -0
  78. data/test/formatters/test_xml.rb +74 -0
  79. data/test/formatters/test_yaml.rb +20 -0
  80. data/test/roo/test_csv.rb +52 -0
  81. data/test/roo/test_excelx.rb +186 -0
  82. data/test/roo/test_libre_office.rb +9 -0
  83. data/test/roo/test_open_office.rb +126 -0
  84. data/test/test_helper.rb +73 -53
  85. data/test/test_roo.rb +1211 -2292
  86. metadata +119 -298
  87. data/CHANGELOG +0 -417
  88. data/Gemfile.lock +0 -78
  89. data/README.markdown +0 -126
  90. data/VERSION +0 -1
  91. data/lib/roo/excel.rb +0 -355
  92. data/lib/roo/excel2003xml.rb +0 -300
  93. data/lib/roo/google.rb +0 -292
  94. data/lib/roo/openoffice.rb +0 -496
  95. data/lib/roo/roo_rails_helper.rb +0 -83
  96. data/lib/roo/worksheet.rb +0 -18
  97. data/scripts/txt2html +0 -67
  98. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  99. data/spec/lib/roo/excel_spec.rb +0 -17
  100. data/spec/lib/roo/google_spec.rb +0 -64
  101. data/test/files/1900_base.xls +0 -0
  102. data/test/files/1900_base.xlsx +0 -0
  103. data/test/files/1904_base.xls +0 -0
  104. data/test/files/1904_base.xlsx +0 -0
  105. data/test/files/Bibelbund.csv +0 -3741
  106. data/test/files/Bibelbund.ods +0 -0
  107. data/test/files/Bibelbund.xls +0 -0
  108. data/test/files/Bibelbund.xlsx +0 -0
  109. data/test/files/Bibelbund.xml +0 -62518
  110. data/test/files/Bibelbund1.ods +0 -0
  111. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  112. data/test/files/bad_excel_date.xls +0 -0
  113. data/test/files/bbu.ods +0 -0
  114. data/test/files/bbu.xls +0 -0
  115. data/test/files/bbu.xlsx +0 -0
  116. data/test/files/bbu.xml +0 -152
  117. data/test/files/bode-v1.ods.zip +0 -0
  118. data/test/files/bode-v1.xls.zip +0 -0
  119. data/test/files/boolean.csv +0 -2
  120. data/test/files/boolean.ods +0 -0
  121. data/test/files/boolean.xls +0 -0
  122. data/test/files/boolean.xlsx +0 -0
  123. data/test/files/boolean.xml +0 -112
  124. data/test/files/borders.ods +0 -0
  125. data/test/files/borders.xls +0 -0
  126. data/test/files/borders.xlsx +0 -0
  127. data/test/files/borders.xml +0 -144
  128. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  129. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  130. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  131. data/test/files/comments.ods +0 -0
  132. data/test/files/comments.xls +0 -0
  133. data/test/files/comments.xlsx +0 -0
  134. data/test/files/csvtypes.csv +0 -1
  135. data/test/files/datetime.ods +0 -0
  136. data/test/files/datetime.xls +0 -0
  137. data/test/files/datetime.xlsx +0 -0
  138. data/test/files/datetime.xml +0 -142
  139. data/test/files/datetime_floatconv.xls +0 -0
  140. data/test/files/datetime_floatconv.xml +0 -148
  141. data/test/files/dreimalvier.ods +0 -0
  142. data/test/files/emptysheets.ods +0 -0
  143. data/test/files/emptysheets.xls +0 -0
  144. data/test/files/emptysheets.xlsx +0 -0
  145. data/test/files/emptysheets.xml +0 -105
  146. data/test/files/excel2003.xml +0 -21140
  147. data/test/files/false_encoding.xls +0 -0
  148. data/test/files/false_encoding.xml +0 -132
  149. data/test/files/file_item_error.xlsx +0 -0
  150. data/test/files/formula.ods +0 -0
  151. data/test/files/formula.xls +0 -0
  152. data/test/files/formula.xlsx +0 -0
  153. data/test/files/formula.xml +0 -134
  154. data/test/files/formula_parse_error.xls +0 -0
  155. data/test/files/formula_parse_error.xml +0 -1833
  156. data/test/files/formula_string_error.xlsx +0 -0
  157. data/test/files/html-escape.ods +0 -0
  158. data/test/files/link.xls +0 -0
  159. data/test/files/link.xlsx +0 -0
  160. data/test/files/matrix.ods +0 -0
  161. data/test/files/matrix.xls +0 -0
  162. data/test/files/named_cells.ods +0 -0
  163. data/test/files/named_cells.xls +0 -0
  164. data/test/files/named_cells.xlsx +0 -0
  165. data/test/files/no_spreadsheet_file.txt +0 -1
  166. data/test/files/numbers1.csv +0 -18
  167. data/test/files/numbers1.ods +0 -0
  168. data/test/files/numbers1.xls +0 -0
  169. data/test/files/numbers1.xlsx +0 -0
  170. data/test/files/numbers1.xml +0 -312
  171. data/test/files/numeric-link.xlsx +0 -0
  172. data/test/files/only_one_sheet.ods +0 -0
  173. data/test/files/only_one_sheet.xls +0 -0
  174. data/test/files/only_one_sheet.xlsx +0 -0
  175. data/test/files/only_one_sheet.xml +0 -67
  176. data/test/files/paragraph.ods +0 -0
  177. data/test/files/paragraph.xls +0 -0
  178. data/test/files/paragraph.xlsx +0 -0
  179. data/test/files/paragraph.xml +0 -127
  180. data/test/files/prova.xls +0 -0
  181. data/test/files/ric.ods +0 -0
  182. data/test/files/simple_spreadsheet.ods +0 -0
  183. data/test/files/simple_spreadsheet.xls +0 -0
  184. data/test/files/simple_spreadsheet.xlsx +0 -0
  185. data/test/files/simple_spreadsheet.xml +0 -225
  186. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  187. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  188. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  189. data/test/files/so_datetime.csv +0 -7
  190. data/test/files/style.ods +0 -0
  191. data/test/files/style.xls +0 -0
  192. data/test/files/style.xlsx +0 -0
  193. data/test/files/style.xml +0 -154
  194. data/test/files/time-test.csv +0 -2
  195. data/test/files/time-test.ods +0 -0
  196. data/test/files/time-test.xls +0 -0
  197. data/test/files/time-test.xlsx +0 -0
  198. data/test/files/time-test.xml +0 -131
  199. data/test/files/type_excel.ods +0 -0
  200. data/test/files/type_excel.xlsx +0 -0
  201. data/test/files/type_excelx.ods +0 -0
  202. data/test/files/type_excelx.xls +0 -0
  203. data/test/files/type_openoffice.xls +0 -0
  204. data/test/files/type_openoffice.xlsx +0 -0
  205. data/test/files/whitespace.ods +0 -0
  206. data/test/files/whitespace.xls +0 -0
  207. data/test/files/whitespace.xlsx +0 -0
  208. data/test/files/whitespace.xml +0 -184
  209. data/test/rm_sub_test.rb +0 -12
  210. data/test/rm_test.rb +0 -7
  211. data/test/test_generic_spreadsheet.rb +0 -259
  212. data/website/index.html +0 -385
  213. data/website/index.txt +0 -423
  214. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  215. data/website/stylesheets/screen.css +0 -130
  216. data/website/template.rhtml +0 -48
@@ -0,0 +1,43 @@
1
+ require 'date'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Cell
6
+ class Time < Roo::Excelx::Cell::DateTime
7
+ attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
8
+
9
+ def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
+ # NOTE: Pass all arguments to DateTime super class.
11
+ super
12
+ @type = :time
13
+ @format = excelx_type.last
14
+ @datetime = create_datetime(base_date, value)
15
+ @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
16
+ end
17
+
18
+ def formatted_value
19
+ formatter = @format.gsub(/#{TIME_FORMATS.keys.join('|')}/, TIME_FORMATS)
20
+ @datetime.strftime(formatter)
21
+ end
22
+
23
+ alias_method :to_s, :formatted_value
24
+
25
+ private
26
+
27
+ # def create_datetime(base_date, value)
28
+ # date = base_date + value.to_f.round(6)
29
+ # datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
30
+ # t = round_datetime(datetime_string)
31
+ #
32
+ # ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
33
+ # end
34
+
35
+ # def round_datetime(datetime_string)
36
+ # /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
37
+ #
38
+ # ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
39
+ # end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,106 @@
1
+ require 'date'
2
+ require 'roo/excelx/cell/base'
3
+ require 'roo/excelx/cell/boolean'
4
+ require 'roo/excelx/cell/datetime'
5
+ require 'roo/excelx/cell/date'
6
+ require 'roo/excelx/cell/empty'
7
+ require 'roo/excelx/cell/number'
8
+ require 'roo/excelx/cell/string'
9
+ require 'roo/excelx/cell/time'
10
+
11
+ module Roo
12
+ class Excelx
13
+ class Cell
14
+ attr_reader :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
15
+ attr_writer :value
16
+
17
+ # DEPRECATED: Please use Cell.create_cell instead.
18
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
19
+ warn '[DEPRECATION] `Cell.new` is deprecated. Please use `Cell.create_cell` instead.'
20
+ @type = type
21
+ @formula = formula
22
+ @base_date = base_date if [:date, :datetime].include?(@type)
23
+ @excelx_type = excelx_type
24
+ @excelx_value = excelx_value
25
+ @style = style
26
+ @value = type_cast_value(value)
27
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
28
+ @coordinate = coordinate
29
+ end
30
+
31
+ def type
32
+ case
33
+ when @formula
34
+ :formula
35
+ when @value.is_a?(Roo::Link)
36
+ :link
37
+ else
38
+ @type
39
+ end
40
+ end
41
+
42
+ def self.create_cell(type, *values)
43
+ case type
44
+ when :string
45
+ Cell::String.new(*values)
46
+ when :boolean
47
+ Cell::Boolean.new(*values)
48
+ when :number
49
+ Cell::Number.new(*values)
50
+ when :date
51
+ Cell::Date.new(*values)
52
+ when :datetime
53
+ Cell::DateTime.new(*values)
54
+ when :time
55
+ Cell::Time.new(*values)
56
+ end
57
+ end
58
+
59
+ # Deprecated: use Roo::Excelx::Coordinate instead.
60
+ class Coordinate
61
+ attr_accessor :row, :column
62
+
63
+ def initialize(row, column)
64
+ warn '[DEPRECATION] `Roo::Excel::Cell::Coordinate` is deprecated. Please use `Roo::Excelx::Coordinate` instead.'
65
+ @row, @column = row, column
66
+ end
67
+ end
68
+
69
+ private
70
+
71
+ def type_cast_value(value)
72
+ case @type
73
+ when :float, :percentage
74
+ value.to_f
75
+ when :date
76
+ create_date(@base_date + value.to_i)
77
+ when :datetime
78
+ create_datetime(@base_date + value.to_f.round(6))
79
+ when :time
80
+ value.to_f * 86_400
81
+ else
82
+ value
83
+ end
84
+ end
85
+
86
+ def create_date(date)
87
+ yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
88
+
89
+ ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
90
+ end
91
+
92
+ def create_datetime(date)
93
+ datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
94
+ t = round_datetime(datetime_string)
95
+
96
+ ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
97
+ end
98
+
99
+ def round_datetime(datetime_string)
100
+ /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
101
+
102
+ ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,55 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Comments < Excelx::Extractor
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
9
+
10
+ private
11
+
12
+ def extract_comments
13
+ return {} unless doc_exists?
14
+
15
+ Hash[doc.xpath('//comments/commentList/comment').map do |comment|
16
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
17
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
18
+ end]
19
+ end
20
+ end
21
+ end
22
+ end
23
+ # xl/comments1.xml
24
+ # <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
25
+ # <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
26
+ # <authors>
27
+ # <author />
28
+ # </authors>
29
+ # <commentList>
30
+ # <comment ref="B4" authorId="0">
31
+ # <text>
32
+ # <r>
33
+ # <rPr>
34
+ # <sz val="10" />
35
+ # <rFont val="Arial" />
36
+ # <family val="2" />
37
+ # </rPr>
38
+ # <t>Comment for B4</t>
39
+ # </r>
40
+ # </text>
41
+ # </comment>
42
+ # <comment ref="B5" authorId="0">
43
+ # <text>
44
+ # <r>
45
+ # <rPr>
46
+ # <sz val="10" />
47
+ # <rFont val="Arial" />
48
+ # <family val="2" />
49
+ # </rPr>
50
+ # <t>Comment for B5</t>
51
+ # </r>
52
+ # </text>
53
+ # </comment>
54
+ # </commentList>
55
+ # </comments>
@@ -0,0 +1,12 @@
1
+ module Roo
2
+ class Excelx
3
+ class Coordinate
4
+ attr_accessor :row, :column
5
+
6
+ def initialize(row, column)
7
+ @row = row
8
+ @column = column
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,21 @@
1
+ module Roo
2
+ class Excelx
3
+ class Extractor
4
+ def initialize(path)
5
+ @path = path
6
+ end
7
+
8
+ private
9
+
10
+ def doc
11
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
12
+
13
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
14
+ end
15
+
16
+ def doc_exists?
17
+ @path && File.exist?(@path)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,64 @@
1
+ module Roo
2
+ class Excelx
3
+ module Format
4
+ EXCEPTIONAL_FORMATS = {
5
+ 'h:mm am/pm' => :date,
6
+ 'h:mm:ss am/pm' => :date
7
+ }
8
+
9
+ STANDARD_FORMATS = {
10
+ 0 => 'General'.freeze,
11
+ 1 => '0'.freeze,
12
+ 2 => '0.00'.freeze,
13
+ 3 => '#,##0'.freeze,
14
+ 4 => '#,##0.00'.freeze,
15
+ 9 => '0%'.freeze,
16
+ 10 => '0.00%'.freeze,
17
+ 11 => '0.00E+00'.freeze,
18
+ 12 => '# ?/?'.freeze,
19
+ 13 => '# ??/??'.freeze,
20
+ 14 => 'mm-dd-yy'.freeze,
21
+ 15 => 'd-mmm-yy'.freeze,
22
+ 16 => 'd-mmm'.freeze,
23
+ 17 => 'mmm-yy'.freeze,
24
+ 18 => 'h:mm AM/PM'.freeze,
25
+ 19 => 'h:mm:ss AM/PM'.freeze,
26
+ 20 => 'h:mm'.freeze,
27
+ 21 => 'h:mm:ss'.freeze,
28
+ 22 => 'm/d/yy h:mm'.freeze,
29
+ 37 => '#,##0 ;(#,##0)'.freeze,
30
+ 38 => '#,##0 ;[Red](#,##0)'.freeze,
31
+ 39 => '#,##0.00;(#,##0.00)'.freeze,
32
+ 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
33
+ 45 => 'mm:ss'.freeze,
34
+ 46 => '[h]:mm:ss'.freeze,
35
+ 47 => 'mmss.0'.freeze,
36
+ 48 => '##0.0E+0'.freeze,
37
+ 49 => '@'.freeze
38
+ }
39
+
40
+ def to_type(format)
41
+ format = format.to_s.downcase
42
+ if (type = EXCEPTIONAL_FORMATS[format])
43
+ type
44
+ elsif format.include?('#')
45
+ :float
46
+ elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
47
+ if format.include?('h') || format.include?('s')
48
+ :datetime
49
+ else
50
+ :date
51
+ end
52
+ elsif format.include?('h') || format.include?('s')
53
+ :time
54
+ elsif format.include?('%')
55
+ :percentage
56
+ else
57
+ :float
58
+ end
59
+ end
60
+
61
+ module_function :to_type
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,25 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Relationships < Excelx::Extractor
6
+ def [](index)
7
+ to_a[index]
8
+ end
9
+
10
+ def to_a
11
+ @relationships ||= extract_relationships
12
+ end
13
+
14
+ private
15
+
16
+ def extract_relationships
17
+ return [] unless doc_exists?
18
+
19
+ Hash[doc.xpath('/Relationships/Relationship').map do |rel|
20
+ [rel.attribute('Id').text, rel]
21
+ end]
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,32 @@
1
+ module Roo
2
+ class Excelx
3
+ # Public: Shared class for allowing sheets to share data. This should
4
+ # reduce memory usage and reduce the number of objects being passed
5
+ # to various inititializers.
6
+ class Shared
7
+ attr_accessor :comments_files, :sheet_files, :rels_files
8
+ def initialize(dir)
9
+ @dir = dir
10
+ @comments_files = []
11
+ @sheet_files = []
12
+ @rels_files = []
13
+ end
14
+
15
+ def styles
16
+ @styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
17
+ end
18
+
19
+ def shared_strings
20
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
21
+ end
22
+
23
+ def workbook
24
+ @workbook ||= Workbook.new(File.join(@dir, 'roo_workbook.xml'))
25
+ end
26
+
27
+ def base_date
28
+ workbook.base_date
29
+ end
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,157 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class SharedStrings < Excelx::Extractor
6
+
7
+ COMMON_STRINGS = {
8
+ t: "t",
9
+ r: "r",
10
+ html_tag_open: "<html>",
11
+ html_tag_closed: "</html>"
12
+ }
13
+
14
+ def [](index)
15
+ to_a[index]
16
+ end
17
+
18
+ def to_a
19
+ @array ||= extract_shared_strings
20
+ end
21
+
22
+ def to_html
23
+ @html ||= extract_html
24
+ end
25
+
26
+ # Use to_html or to_a for html returns
27
+ # See what is happening with commit???
28
+ def use_html?(index)
29
+ to_html[index][/<([biu]|sup|sub)>/]
30
+ end
31
+
32
+ private
33
+
34
+ def fix_invalid_shared_strings(doc)
35
+ invalid = { '_x000D_' => "\n" }
36
+ xml = doc.to_s
37
+ return doc unless xml[/#{invalid.keys.join('|')}/]
38
+
39
+ ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
40
+ end
41
+
42
+ def extract_shared_strings
43
+ return [] unless doc_exists?
44
+
45
+ document = fix_invalid_shared_strings(doc)
46
+ # read the shared strings xml document
47
+ document.xpath('/sst/si').map do |si|
48
+ shared_string = ''
49
+ si.children.each do |elem|
50
+ case elem.name
51
+ when 'r'
52
+ elem.children.each do |r_elem|
53
+ shared_string << r_elem.content if r_elem.name == 't'
54
+ end
55
+ when 't'
56
+ shared_string = elem.content
57
+ end
58
+ end
59
+ shared_string
60
+ end
61
+ end
62
+
63
+ def extract_html
64
+ return [] unless doc_exists?
65
+ fix_invalid_shared_strings(doc)
66
+ # read the shared strings xml document
67
+ doc.xpath('/sst/si').map do |si|
68
+ html_string = '<html>'
69
+ si.children.each do |elem|
70
+ case elem.name
71
+ when 'r'
72
+ html_string << extract_html_r(elem)
73
+ when 't'
74
+ html_string << elem.content
75
+ end # case elem.name
76
+ end # si.children.each do |elem|
77
+ html_string << '</html>'
78
+ end # doc.xpath('/sst/si').map do |si|
79
+ end # def extract_html
80
+
81
+ # The goal of this function is to take the following XML code snippet and create a html tag
82
+ # r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
83
+ # {code:xml}
84
+ # <r>
85
+ # <rPr>
86
+ # <i/>
87
+ # <b/>
88
+ # <u/>
89
+ # <vertAlign val="subscript"/>
90
+ # <vertAlign val="superscript"/>
91
+ # </rPr>
92
+ # <t>TEXT</t>
93
+ # </r>
94
+ # {code}
95
+ #
96
+ # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
97
+ def extract_html_r(r_elem)
98
+ str = ''
99
+ xml_elems = {
100
+ sub: false,
101
+ sup: false,
102
+ b: false,
103
+ i: false,
104
+ u: false
105
+ }
106
+ b, i, u, sub, sup = false, false, false, false, false
107
+ r_elem.children.each do |elem|
108
+ case elem.name
109
+ when 'rPr'
110
+ elem.children.each do |rPr_elem|
111
+ case rPr_elem.name
112
+ when 'b'
113
+ # set formatting for Bold to true
114
+ xml_elems[:b] = true
115
+ when 'i'
116
+ # set formatting for Italics to true
117
+ xml_elems[:i] = true
118
+ when 'u'
119
+ # set formatting for Underline to true
120
+ xml_elems[:u] = true
121
+ when 'vertAlign'
122
+ # See if the Vertical Alignment is subscript or superscript
123
+ case rPr_elem.xpath('@val').first.value
124
+ when 'subscript'
125
+ # set formatting for Subscript to true and Superscript to false ... Can't have both
126
+ xml_elems[:sub] = true
127
+ xml_elems[:sup] = false
128
+ when 'superscript'
129
+ # set formatting for Superscript to true and Subscript to false ... Can't have both
130
+ xml_elems[:sup] = true
131
+ xml_elems[:sub] = false
132
+ end
133
+ end
134
+ end
135
+ when 't'
136
+ str << create_html(elem.content, xml_elems)
137
+ end
138
+ end
139
+ str
140
+ end # extract_html_r
141
+
142
+ # This will return an html string
143
+ def create_html(text, formatting)
144
+ tmp_str = ''
145
+ formatting.each do |elem, val|
146
+ tmp_str << "<#{elem}>" if val
147
+ end
148
+ tmp_str << text
149
+ reverse_format = Hash[formatting.to_a.reverse]
150
+ reverse_format.each do |elem, val|
151
+ tmp_str << "</#{elem}>" if val
152
+ end
153
+ tmp_str
154
+ end
155
+ end # class SharedStrings < Excelx::Extractor
156
+ end # class Excelx
157
+ end # module Roo
@@ -0,0 +1,112 @@
1
+ require 'forwardable'
2
+ module Roo
3
+ class Excelx
4
+ class Sheet
5
+ extend Forwardable
6
+
7
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
8
+
9
+ def initialize(name, shared, sheet_index, options = {})
10
+ @name = name
11
+ @shared = shared
12
+ @rels = Relationships.new(rels_files[sheet_index])
13
+ @comments = Comments.new(comments_files[sheet_index])
14
+ @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
15
+ end
16
+
17
+ def cells
18
+ @cells ||= @sheet.cells(@rels)
19
+ end
20
+
21
+ def present_cells
22
+ @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
23
+ end
24
+
25
+ # Yield each row as array of Excelx::Cell objects
26
+ # accepts options max_rows (int) (offset by 1 for header),
27
+ # pad_cells (boolean) and offset (int)
28
+ def each_row(options = {}, &block)
29
+ row_count = 0
30
+ options[:offset] ||= 0
31
+ @sheet.each_row_streaming do |row|
32
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
33
+ if block_given? && !(options[:offset] && row_count < options[:offset])
34
+ block.call(cells_for_row_element(row, options))
35
+ end
36
+ row_count += 1
37
+ end
38
+ end
39
+
40
+ def row(row_number)
41
+ first_column.upto(last_column).map do |col|
42
+ cells[[row_number, col]]
43
+ end.map { |cell| cell && cell.value }
44
+ end
45
+
46
+ def column(col_number)
47
+ first_row.upto(last_row).map do |row|
48
+ cells[[row, col_number]]
49
+ end.map { |cell| cell && cell.value }
50
+ end
51
+
52
+ # returns the number of the first non-empty row
53
+ def first_row
54
+ @first_row ||= present_cells.keys.map { |row, _| row }.min
55
+ end
56
+
57
+ def last_row
58
+ @last_row ||= present_cells.keys.map { |row, _| row }.max
59
+ end
60
+
61
+ # returns the number of the first non-empty column
62
+ def first_column
63
+ @first_column ||= present_cells.keys.map { |_, col| col }.min
64
+ end
65
+
66
+ # returns the number of the last non-empty column
67
+ def last_column
68
+ @last_column ||= present_cells.keys.map { |_, col| col }.max
69
+ end
70
+
71
+ def excelx_format(key)
72
+ cell = cells[key]
73
+ styles.style_format(cell.style).to_s if cell
74
+ end
75
+
76
+ def hyperlinks
77
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
78
+ end
79
+
80
+ def comments
81
+ @comments.comments
82
+ end
83
+
84
+ def dimensions
85
+ @sheet.dimensions
86
+ end
87
+
88
+ private
89
+
90
+ # Take an xml row and return an array of Excelx::Cell objects
91
+ # optionally pad array to header width(assumed 1st row).
92
+ # takes option pad_cells (boolean) defaults false
93
+ def cells_for_row_element(row_element, options = {})
94
+ return [] unless row_element
95
+ cell_col = 0
96
+ cells = []
97
+ @sheet.each_cell(row_element) do |cell|
98
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
99
+ cells << cell
100
+ cell_col = cell.coordinate.column
101
+ end
102
+ cells
103
+ end
104
+
105
+ def pad_cells(cell, last_column)
106
+ pad = []
107
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
108
+ pad
109
+ end
110
+ end
111
+ end
112
+ end