roo 1.13.2 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG.md +515 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo/base.rb +334 -395
  15. data/lib/roo/csv.rb +120 -113
  16. data/lib/roo/excelx/cell.rb +77 -0
  17. data/lib/roo/excelx/comments.rb +22 -0
  18. data/lib/roo/excelx/extractor.rb +22 -0
  19. data/lib/roo/excelx/relationships.rb +25 -0
  20. data/lib/roo/excelx/shared_strings.rb +37 -0
  21. data/lib/roo/excelx/sheet.rb +107 -0
  22. data/lib/roo/excelx/sheet_doc.rb +200 -0
  23. data/lib/roo/excelx/styles.rb +64 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/excelx.rb +413 -597
  26. data/lib/roo/font.rb +17 -0
  27. data/lib/roo/libre_office.rb +5 -0
  28. data/lib/roo/link.rb +15 -0
  29. data/lib/roo/{openoffice.rb → open_office.rb} +681 -496
  30. data/lib/roo/spreadsheet.rb +20 -23
  31. data/lib/roo/utils.rb +78 -0
  32. data/lib/roo/version.rb +3 -0
  33. data/lib/roo.rb +18 -24
  34. data/roo.gemspec +20 -204
  35. data/spec/lib/roo/base_spec.rb +1 -4
  36. data/spec/lib/roo/csv_spec.rb +21 -13
  37. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  38. data/spec/lib/roo/excelx_spec.rb +424 -11
  39. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  40. data/spec/lib/roo/openoffice_spec.rb +13 -8
  41. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  42. data/spec/lib/roo/utils_spec.rb +106 -0
  43. data/spec/spec_helper.rb +2 -1
  44. data/test/test_generic_spreadsheet.rb +117 -139
  45. data/test/test_helper.rb +9 -56
  46. data/test/test_roo.rb +274 -478
  47. metadata +65 -303
  48. data/CHANGELOG +0 -417
  49. data/Gemfile.lock +0 -78
  50. data/README.markdown +0 -126
  51. data/VERSION +0 -1
  52. data/lib/roo/excel.rb +0 -355
  53. data/lib/roo/excel2003xml.rb +0 -300
  54. data/lib/roo/google.rb +0 -292
  55. data/lib/roo/roo_rails_helper.rb +0 -83
  56. data/lib/roo/worksheet.rb +0 -18
  57. data/scripts/txt2html +0 -67
  58. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  59. data/spec/lib/roo/excel_spec.rb +0 -17
  60. data/spec/lib/roo/google_spec.rb +0 -64
  61. data/test/files/1900_base.xls +0 -0
  62. data/test/files/1900_base.xlsx +0 -0
  63. data/test/files/1904_base.xls +0 -0
  64. data/test/files/1904_base.xlsx +0 -0
  65. data/test/files/Bibelbund.csv +0 -3741
  66. data/test/files/Bibelbund.ods +0 -0
  67. data/test/files/Bibelbund.xls +0 -0
  68. data/test/files/Bibelbund.xlsx +0 -0
  69. data/test/files/Bibelbund.xml +0 -62518
  70. data/test/files/Bibelbund1.ods +0 -0
  71. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  72. data/test/files/bad_excel_date.xls +0 -0
  73. data/test/files/bbu.ods +0 -0
  74. data/test/files/bbu.xls +0 -0
  75. data/test/files/bbu.xlsx +0 -0
  76. data/test/files/bbu.xml +0 -152
  77. data/test/files/bode-v1.ods.zip +0 -0
  78. data/test/files/bode-v1.xls.zip +0 -0
  79. data/test/files/boolean.csv +0 -2
  80. data/test/files/boolean.ods +0 -0
  81. data/test/files/boolean.xls +0 -0
  82. data/test/files/boolean.xlsx +0 -0
  83. data/test/files/boolean.xml +0 -112
  84. data/test/files/borders.ods +0 -0
  85. data/test/files/borders.xls +0 -0
  86. data/test/files/borders.xlsx +0 -0
  87. data/test/files/borders.xml +0 -144
  88. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  89. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  90. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  91. data/test/files/comments.ods +0 -0
  92. data/test/files/comments.xls +0 -0
  93. data/test/files/comments.xlsx +0 -0
  94. data/test/files/csvtypes.csv +0 -1
  95. data/test/files/datetime.ods +0 -0
  96. data/test/files/datetime.xls +0 -0
  97. data/test/files/datetime.xlsx +0 -0
  98. data/test/files/datetime.xml +0 -142
  99. data/test/files/datetime_floatconv.xls +0 -0
  100. data/test/files/datetime_floatconv.xml +0 -148
  101. data/test/files/dreimalvier.ods +0 -0
  102. data/test/files/emptysheets.ods +0 -0
  103. data/test/files/emptysheets.xls +0 -0
  104. data/test/files/emptysheets.xlsx +0 -0
  105. data/test/files/emptysheets.xml +0 -105
  106. data/test/files/excel2003.xml +0 -21140
  107. data/test/files/false_encoding.xls +0 -0
  108. data/test/files/false_encoding.xml +0 -132
  109. data/test/files/file_item_error.xlsx +0 -0
  110. data/test/files/formula.ods +0 -0
  111. data/test/files/formula.xls +0 -0
  112. data/test/files/formula.xlsx +0 -0
  113. data/test/files/formula.xml +0 -134
  114. data/test/files/formula_parse_error.xls +0 -0
  115. data/test/files/formula_parse_error.xml +0 -1833
  116. data/test/files/formula_string_error.xlsx +0 -0
  117. data/test/files/html-escape.ods +0 -0
  118. data/test/files/link.xls +0 -0
  119. data/test/files/link.xlsx +0 -0
  120. data/test/files/matrix.ods +0 -0
  121. data/test/files/matrix.xls +0 -0
  122. data/test/files/named_cells.ods +0 -0
  123. data/test/files/named_cells.xls +0 -0
  124. data/test/files/named_cells.xlsx +0 -0
  125. data/test/files/no_spreadsheet_file.txt +0 -1
  126. data/test/files/numbers1.csv +0 -18
  127. data/test/files/numbers1.ods +0 -0
  128. data/test/files/numbers1.xls +0 -0
  129. data/test/files/numbers1.xlsx +0 -0
  130. data/test/files/numbers1.xml +0 -312
  131. data/test/files/numeric-link.xlsx +0 -0
  132. data/test/files/only_one_sheet.ods +0 -0
  133. data/test/files/only_one_sheet.xls +0 -0
  134. data/test/files/only_one_sheet.xlsx +0 -0
  135. data/test/files/only_one_sheet.xml +0 -67
  136. data/test/files/paragraph.ods +0 -0
  137. data/test/files/paragraph.xls +0 -0
  138. data/test/files/paragraph.xlsx +0 -0
  139. data/test/files/paragraph.xml +0 -127
  140. data/test/files/prova.xls +0 -0
  141. data/test/files/ric.ods +0 -0
  142. data/test/files/simple_spreadsheet.ods +0 -0
  143. data/test/files/simple_spreadsheet.xls +0 -0
  144. data/test/files/simple_spreadsheet.xlsx +0 -0
  145. data/test/files/simple_spreadsheet.xml +0 -225
  146. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  147. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  148. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  149. data/test/files/so_datetime.csv +0 -7
  150. data/test/files/style.ods +0 -0
  151. data/test/files/style.xls +0 -0
  152. data/test/files/style.xlsx +0 -0
  153. data/test/files/style.xml +0 -154
  154. data/test/files/time-test.csv +0 -2
  155. data/test/files/time-test.ods +0 -0
  156. data/test/files/time-test.xls +0 -0
  157. data/test/files/time-test.xlsx +0 -0
  158. data/test/files/time-test.xml +0 -131
  159. data/test/files/type_excel.ods +0 -0
  160. data/test/files/type_excel.xlsx +0 -0
  161. data/test/files/type_excelx.ods +0 -0
  162. data/test/files/type_excelx.xls +0 -0
  163. data/test/files/type_openoffice.xls +0 -0
  164. data/test/files/type_openoffice.xlsx +0 -0
  165. data/test/files/whitespace.ods +0 -0
  166. data/test/files/whitespace.xls +0 -0
  167. data/test/files/whitespace.xlsx +0 -0
  168. data/test/files/whitespace.xml +0 -184
  169. data/test/rm_sub_test.rb +0 -12
  170. data/test/rm_test.rb +0 -7
  171. data/website/index.html +0 -385
  172. data/website/index.txt +0 -423
  173. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  174. data/website/stylesheets/screen.css +0 -130
  175. data/website/template.rhtml +0 -48
@@ -0,0 +1,200 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class SheetDoc < Excelx::Extractor
6
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
7
+ super(path)
8
+ @options = options
9
+ @relationships = relationships
10
+ @styles = styles
11
+ @shared_strings = shared_strings
12
+ @workbook = workbook
13
+ end
14
+
15
+ def cells(relationships)
16
+ @cells ||= extract_cells(relationships)
17
+ end
18
+
19
+ def hyperlinks(relationships)
20
+ @hyperlinks ||= extract_hyperlinks(relationships)
21
+ end
22
+
23
+ # Get the dimensions for the sheet.
24
+ # This is the upper bound of cells that might
25
+ # be parsed. (the document may be sparse so cell count is only upper bound)
26
+ def dimensions
27
+ @dimensions ||= extract_dimensions
28
+ end
29
+
30
+ # Yield each row xml element to caller
31
+ def each_row_streaming(&block)
32
+ Roo::Utils.each_element(@path, 'row', &block)
33
+ end
34
+
35
+ # Yield each cell as Excelx::Cell to caller for given
36
+ # row xml
37
+ def each_cell(row_xml)
38
+ return [] unless row_xml
39
+ row_xml.children.each do |cell_element|
40
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
41
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def cell_from_xml(cell_xml, hyperlink)
48
+ # This is error prone, to_i will silently turn a nil into a 0
49
+ # and it works by coincidence that Format[0] is general
50
+ style = cell_xml['s'].to_i # should be here
51
+ # c: <c r="A5" s="2">
52
+ # <v>22606</v>
53
+ # </c>, format: , tmp_type: float
54
+ value_type =
55
+ case cell_xml['t']
56
+ when 's'
57
+ :shared
58
+ when 'b'
59
+ :boolean
60
+ when 'str'
61
+ :string
62
+ when 'inlineStr'
63
+ :inlinestr
64
+ else
65
+ format = @styles.style_format(style)
66
+ Excelx::Format.to_type(format)
67
+ end
68
+ formula = nil
69
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
70
+ cell_xml.children.each do |cell|
71
+ case cell.name
72
+ when 'is'
73
+ cell.children.each do |inline_str|
74
+ if inline_str.name == 't'
75
+ return Excelx::Cell.new(inline_str.content, :string, formula, :string, inline_str.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
76
+ end
77
+ end
78
+ when 'f'
79
+ formula = cell.content
80
+ when 'v'
81
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
82
+ value_type =
83
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
84
+ :datetime
85
+ else
86
+ :date
87
+ end
88
+ end
89
+ excelx_type = [:numeric_or_formula, format.to_s]
90
+ value =
91
+ case value_type
92
+ when :shared
93
+ value_type = :string
94
+ excelx_type = :string
95
+ @shared_strings[cell.content.to_i]
96
+ when :boolean
97
+ (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
98
+ when :date, :time, :datetime
99
+ cell.content
100
+ when :formula
101
+ cell.content.to_f
102
+ when :string
103
+ excelx_type = :string
104
+ cell.content
105
+ else
106
+ value_type = :float
107
+ cell.content
108
+ end
109
+ return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
110
+ end
111
+ end
112
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
113
+ end
114
+
115
+ def extract_hyperlinks(relationships)
116
+ Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
117
+ if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
118
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
119
+ end
120
+ end.compact]
121
+ end
122
+
123
+ def expand_merged_ranges(cells)
124
+ # Extract merged ranges from xml
125
+ merges = {}
126
+ doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
127
+ tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
128
+ for row in tl[0]..br[0] do
129
+ for col in tl[1]..br[1] do
130
+ next if row == tl[0] && col == tl[1]
131
+ merges[[row, col]] = tl
132
+ end
133
+ end
134
+ end
135
+ # Duplicate value into all cells in merged range
136
+ merges.each do |dst, src|
137
+ cells[dst] = cells[src]
138
+ end
139
+ end
140
+
141
+ def extract_cells(relationships)
142
+ extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
143
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
144
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
145
+ end]
146
+
147
+ expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
148
+
149
+ extracted_cells
150
+ end
151
+
152
+ def extract_dimensions
153
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
154
+ return dimension.attributes['ref'].value
155
+ end
156
+ end
157
+
158
+ =begin
159
+ Datei xl/comments1.xml
160
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
161
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
162
+ <authors>
163
+ <author />
164
+ </authors>
165
+ <commentList>
166
+ <comment ref="B4" authorId="0">
167
+ <text>
168
+ <r>
169
+ <rPr>
170
+ <sz val="10" />
171
+ <rFont val="Arial" />
172
+ <family val="2" />
173
+ </rPr>
174
+ <t>Kommentar fuer B4</t>
175
+ </r>
176
+ </text>
177
+ </comment>
178
+ <comment ref="B5" authorId="0">
179
+ <text>
180
+ <r>
181
+ <rPr>
182
+ <sz val="10" />
183
+ <rFont val="Arial" />
184
+ <family val="2" />
185
+ </rPr>
186
+ <t>Kommentar fuer B5</t>
187
+ </r>
188
+ </text>
189
+ </comment>
190
+ </commentList>
191
+ </comments>
192
+ =end
193
+ =begin
194
+ if @comments_doc[self.sheets.index(sheet)]
195
+ read_comments(sheet)
196
+ end
197
+ =end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,64 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx
6
+ class Styles < Excelx::Extractor
7
+ # convert internal excelx attribute to a format
8
+ def style_format(style)
9
+ id = num_fmt_ids[style.to_i]
10
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
11
+ end
12
+
13
+ def definitions
14
+ @definitions ||= extract_definitions
15
+ end
16
+
17
+ private
18
+
19
+ def num_fmt_ids
20
+ @num_fmt_ids ||= extract_num_fmt_ids
21
+ end
22
+
23
+ def num_fmts
24
+ @num_fmts ||= extract_num_fmts
25
+ end
26
+
27
+ def fonts
28
+ @fonts ||= extract_fonts
29
+ end
30
+
31
+ def extract_definitions
32
+ doc.xpath('//cellXfs').flat_map do |xfs|
33
+ xfs.children.map do |xf|
34
+ fonts[xf['fontId'].to_i]
35
+ end
36
+ end
37
+ end
38
+
39
+ def extract_fonts
40
+ doc.xpath('//fonts/font').map do |font_el|
41
+ Font.new.tap do |font|
42
+ font.bold = !font_el.xpath('./b').empty?
43
+ font.italic = !font_el.xpath('./i').empty?
44
+ font.underline = !font_el.xpath('./u').empty?
45
+ end
46
+ end
47
+ end
48
+
49
+ def extract_num_fmt_ids
50
+ doc.xpath('//cellXfs').flat_map do |xfs|
51
+ xfs.children.map do |xf|
52
+ xf['numFmtId']
53
+ end
54
+ end
55
+ end
56
+
57
+ def extract_num_fmts
58
+ Hash[doc.xpath('//numFmt').map do |num_fmt|
59
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
60
+ end]
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,59 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Workbook < Excelx::Extractor
6
+ class Label
7
+ attr_reader :sheet, :row, :col, :name
8
+
9
+ def initialize(name, sheet, row, col)
10
+ @name = name
11
+ @sheet = sheet
12
+ @row = row.to_i
13
+ @col = ::Roo::Utils.letter_to_number(col)
14
+ end
15
+
16
+ def key
17
+ [@row, @col]
18
+ end
19
+ end
20
+
21
+ def initialize(path)
22
+ super
23
+ fail ArgumentError, 'missing required workbook file' unless doc_exists?
24
+ end
25
+
26
+ def sheets
27
+ doc.xpath('//sheet')
28
+ end
29
+
30
+ # aka labels
31
+ def defined_names
32
+ Hash[doc.xpath('//definedName').map do |defined_name|
33
+ # "Sheet1!$C$5"
34
+ sheet, coordinates = defined_name.text.split('!$', 2)
35
+ col, row = coordinates.split('$')
36
+ name = defined_name['name']
37
+ [name, Label.new(name, sheet, row, col)]
38
+ end]
39
+ end
40
+
41
+ def base_date
42
+ @base_date ||=
43
+ begin
44
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
45
+ # it's set in the Workbook's workbookPr
46
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
47
+ result = Date.new(1899, 12, 30) # default
48
+ doc.css('workbookPr[date1904]').each do |workbookPr|
49
+ if workbookPr['date1904'] =~ /true|1/i
50
+ result = Date.new(1904, 01, 01)
51
+ break
52
+ end
53
+ end
54
+ result
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end