roo 1.13.2 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG.md +515 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo/base.rb +334 -395
  15. data/lib/roo/csv.rb +120 -113
  16. data/lib/roo/excelx/cell.rb +77 -0
  17. data/lib/roo/excelx/comments.rb +22 -0
  18. data/lib/roo/excelx/extractor.rb +22 -0
  19. data/lib/roo/excelx/relationships.rb +25 -0
  20. data/lib/roo/excelx/shared_strings.rb +37 -0
  21. data/lib/roo/excelx/sheet.rb +107 -0
  22. data/lib/roo/excelx/sheet_doc.rb +200 -0
  23. data/lib/roo/excelx/styles.rb +64 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/excelx.rb +413 -597
  26. data/lib/roo/font.rb +17 -0
  27. data/lib/roo/libre_office.rb +5 -0
  28. data/lib/roo/link.rb +15 -0
  29. data/lib/roo/{openoffice.rb → open_office.rb} +681 -496
  30. data/lib/roo/spreadsheet.rb +20 -23
  31. data/lib/roo/utils.rb +78 -0
  32. data/lib/roo/version.rb +3 -0
  33. data/lib/roo.rb +18 -24
  34. data/roo.gemspec +20 -204
  35. data/spec/lib/roo/base_spec.rb +1 -4
  36. data/spec/lib/roo/csv_spec.rb +21 -13
  37. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  38. data/spec/lib/roo/excelx_spec.rb +424 -11
  39. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  40. data/spec/lib/roo/openoffice_spec.rb +13 -8
  41. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  42. data/spec/lib/roo/utils_spec.rb +106 -0
  43. data/spec/spec_helper.rb +2 -1
  44. data/test/test_generic_spreadsheet.rb +117 -139
  45. data/test/test_helper.rb +9 -56
  46. data/test/test_roo.rb +274 -478
  47. metadata +65 -303
  48. data/CHANGELOG +0 -417
  49. data/Gemfile.lock +0 -78
  50. data/README.markdown +0 -126
  51. data/VERSION +0 -1
  52. data/lib/roo/excel.rb +0 -355
  53. data/lib/roo/excel2003xml.rb +0 -300
  54. data/lib/roo/google.rb +0 -292
  55. data/lib/roo/roo_rails_helper.rb +0 -83
  56. data/lib/roo/worksheet.rb +0 -18
  57. data/scripts/txt2html +0 -67
  58. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  59. data/spec/lib/roo/excel_spec.rb +0 -17
  60. data/spec/lib/roo/google_spec.rb +0 -64
  61. data/test/files/1900_base.xls +0 -0
  62. data/test/files/1900_base.xlsx +0 -0
  63. data/test/files/1904_base.xls +0 -0
  64. data/test/files/1904_base.xlsx +0 -0
  65. data/test/files/Bibelbund.csv +0 -3741
  66. data/test/files/Bibelbund.ods +0 -0
  67. data/test/files/Bibelbund.xls +0 -0
  68. data/test/files/Bibelbund.xlsx +0 -0
  69. data/test/files/Bibelbund.xml +0 -62518
  70. data/test/files/Bibelbund1.ods +0 -0
  71. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  72. data/test/files/bad_excel_date.xls +0 -0
  73. data/test/files/bbu.ods +0 -0
  74. data/test/files/bbu.xls +0 -0
  75. data/test/files/bbu.xlsx +0 -0
  76. data/test/files/bbu.xml +0 -152
  77. data/test/files/bode-v1.ods.zip +0 -0
  78. data/test/files/bode-v1.xls.zip +0 -0
  79. data/test/files/boolean.csv +0 -2
  80. data/test/files/boolean.ods +0 -0
  81. data/test/files/boolean.xls +0 -0
  82. data/test/files/boolean.xlsx +0 -0
  83. data/test/files/boolean.xml +0 -112
  84. data/test/files/borders.ods +0 -0
  85. data/test/files/borders.xls +0 -0
  86. data/test/files/borders.xlsx +0 -0
  87. data/test/files/borders.xml +0 -144
  88. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  89. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  90. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  91. data/test/files/comments.ods +0 -0
  92. data/test/files/comments.xls +0 -0
  93. data/test/files/comments.xlsx +0 -0
  94. data/test/files/csvtypes.csv +0 -1
  95. data/test/files/datetime.ods +0 -0
  96. data/test/files/datetime.xls +0 -0
  97. data/test/files/datetime.xlsx +0 -0
  98. data/test/files/datetime.xml +0 -142
  99. data/test/files/datetime_floatconv.xls +0 -0
  100. data/test/files/datetime_floatconv.xml +0 -148
  101. data/test/files/dreimalvier.ods +0 -0
  102. data/test/files/emptysheets.ods +0 -0
  103. data/test/files/emptysheets.xls +0 -0
  104. data/test/files/emptysheets.xlsx +0 -0
  105. data/test/files/emptysheets.xml +0 -105
  106. data/test/files/excel2003.xml +0 -21140
  107. data/test/files/false_encoding.xls +0 -0
  108. data/test/files/false_encoding.xml +0 -132
  109. data/test/files/file_item_error.xlsx +0 -0
  110. data/test/files/formula.ods +0 -0
  111. data/test/files/formula.xls +0 -0
  112. data/test/files/formula.xlsx +0 -0
  113. data/test/files/formula.xml +0 -134
  114. data/test/files/formula_parse_error.xls +0 -0
  115. data/test/files/formula_parse_error.xml +0 -1833
  116. data/test/files/formula_string_error.xlsx +0 -0
  117. data/test/files/html-escape.ods +0 -0
  118. data/test/files/link.xls +0 -0
  119. data/test/files/link.xlsx +0 -0
  120. data/test/files/matrix.ods +0 -0
  121. data/test/files/matrix.xls +0 -0
  122. data/test/files/named_cells.ods +0 -0
  123. data/test/files/named_cells.xls +0 -0
  124. data/test/files/named_cells.xlsx +0 -0
  125. data/test/files/no_spreadsheet_file.txt +0 -1
  126. data/test/files/numbers1.csv +0 -18
  127. data/test/files/numbers1.ods +0 -0
  128. data/test/files/numbers1.xls +0 -0
  129. data/test/files/numbers1.xlsx +0 -0
  130. data/test/files/numbers1.xml +0 -312
  131. data/test/files/numeric-link.xlsx +0 -0
  132. data/test/files/only_one_sheet.ods +0 -0
  133. data/test/files/only_one_sheet.xls +0 -0
  134. data/test/files/only_one_sheet.xlsx +0 -0
  135. data/test/files/only_one_sheet.xml +0 -67
  136. data/test/files/paragraph.ods +0 -0
  137. data/test/files/paragraph.xls +0 -0
  138. data/test/files/paragraph.xlsx +0 -0
  139. data/test/files/paragraph.xml +0 -127
  140. data/test/files/prova.xls +0 -0
  141. data/test/files/ric.ods +0 -0
  142. data/test/files/simple_spreadsheet.ods +0 -0
  143. data/test/files/simple_spreadsheet.xls +0 -0
  144. data/test/files/simple_spreadsheet.xlsx +0 -0
  145. data/test/files/simple_spreadsheet.xml +0 -225
  146. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  147. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  148. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  149. data/test/files/so_datetime.csv +0 -7
  150. data/test/files/style.ods +0 -0
  151. data/test/files/style.xls +0 -0
  152. data/test/files/style.xlsx +0 -0
  153. data/test/files/style.xml +0 -154
  154. data/test/files/time-test.csv +0 -2
  155. data/test/files/time-test.ods +0 -0
  156. data/test/files/time-test.xls +0 -0
  157. data/test/files/time-test.xlsx +0 -0
  158. data/test/files/time-test.xml +0 -131
  159. data/test/files/type_excel.ods +0 -0
  160. data/test/files/type_excel.xlsx +0 -0
  161. data/test/files/type_excelx.ods +0 -0
  162. data/test/files/type_excelx.xls +0 -0
  163. data/test/files/type_openoffice.xls +0 -0
  164. data/test/files/type_openoffice.xlsx +0 -0
  165. data/test/files/whitespace.ods +0 -0
  166. data/test/files/whitespace.xls +0 -0
  167. data/test/files/whitespace.xlsx +0 -0
  168. data/test/files/whitespace.xml +0 -184
  169. data/test/rm_sub_test.rb +0 -12
  170. data/test/rm_test.rb +0 -7
  171. data/website/index.html +0 -385
  172. data/website/index.txt +0 -423
  173. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  174. data/website/stylesheets/screen.css +0 -130
  175. data/website/template.rhtml +0 -48
@@ -0,0 +1,200 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class SheetDoc < Excelx::Extractor
6
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
7
+ super(path)
8
+ @options = options
9
+ @relationships = relationships
10
+ @styles = styles
11
+ @shared_strings = shared_strings
12
+ @workbook = workbook
13
+ end
14
+
15
+ def cells(relationships)
16
+ @cells ||= extract_cells(relationships)
17
+ end
18
+
19
+ def hyperlinks(relationships)
20
+ @hyperlinks ||= extract_hyperlinks(relationships)
21
+ end
22
+
23
+ # Get the dimensions for the sheet.
24
+ # This is the upper bound of cells that might
25
+ # be parsed. (the document may be sparse so cell count is only upper bound)
26
+ def dimensions
27
+ @dimensions ||= extract_dimensions
28
+ end
29
+
30
+ # Yield each row xml element to caller
31
+ def each_row_streaming(&block)
32
+ Roo::Utils.each_element(@path, 'row', &block)
33
+ end
34
+
35
+ # Yield each cell as Excelx::Cell to caller for given
36
+ # row xml
37
+ def each_cell(row_xml)
38
+ return [] unless row_xml
39
+ row_xml.children.each do |cell_element|
40
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
41
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
42
+ end
43
+ end
44
+
45
+ private
46
+
47
+ def cell_from_xml(cell_xml, hyperlink)
48
+ # This is error prone, to_i will silently turn a nil into a 0
49
+ # and it works by coincidence that Format[0] is general
50
+ style = cell_xml['s'].to_i # should be here
51
+ # c: <c r="A5" s="2">
52
+ # <v>22606</v>
53
+ # </c>, format: , tmp_type: float
54
+ value_type =
55
+ case cell_xml['t']
56
+ when 's'
57
+ :shared
58
+ when 'b'
59
+ :boolean
60
+ when 'str'
61
+ :string
62
+ when 'inlineStr'
63
+ :inlinestr
64
+ else
65
+ format = @styles.style_format(style)
66
+ Excelx::Format.to_type(format)
67
+ end
68
+ formula = nil
69
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
70
+ cell_xml.children.each do |cell|
71
+ case cell.name
72
+ when 'is'
73
+ cell.children.each do |inline_str|
74
+ if inline_str.name == 't'
75
+ return Excelx::Cell.new(inline_str.content, :string, formula, :string, inline_str.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
76
+ end
77
+ end
78
+ when 'f'
79
+ formula = cell.content
80
+ when 'v'
81
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
82
+ value_type =
83
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
84
+ :datetime
85
+ else
86
+ :date
87
+ end
88
+ end
89
+ excelx_type = [:numeric_or_formula, format.to_s]
90
+ value =
91
+ case value_type
92
+ when :shared
93
+ value_type = :string
94
+ excelx_type = :string
95
+ @shared_strings[cell.content.to_i]
96
+ when :boolean
97
+ (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
98
+ when :date, :time, :datetime
99
+ cell.content
100
+ when :formula
101
+ cell.content.to_f
102
+ when :string
103
+ excelx_type = :string
104
+ cell.content
105
+ else
106
+ value_type = :float
107
+ cell.content
108
+ end
109
+ return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
110
+ end
111
+ end
112
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
113
+ end
114
+
115
+ def extract_hyperlinks(relationships)
116
+ Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
117
+ if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
118
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
119
+ end
120
+ end.compact]
121
+ end
122
+
123
+ def expand_merged_ranges(cells)
124
+ # Extract merged ranges from xml
125
+ merges = {}
126
+ doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
127
+ tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
128
+ for row in tl[0]..br[0] do
129
+ for col in tl[1]..br[1] do
130
+ next if row == tl[0] && col == tl[1]
131
+ merges[[row, col]] = tl
132
+ end
133
+ end
134
+ end
135
+ # Duplicate value into all cells in merged range
136
+ merges.each do |dst, src|
137
+ cells[dst] = cells[src]
138
+ end
139
+ end
140
+
141
+ def extract_cells(relationships)
142
+ extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
143
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
144
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
145
+ end]
146
+
147
+ expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
148
+
149
+ extracted_cells
150
+ end
151
+
152
+ def extract_dimensions
153
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
154
+ return dimension.attributes['ref'].value
155
+ end
156
+ end
157
+
158
+ =begin
159
+ Datei xl/comments1.xml
160
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
161
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
162
+ <authors>
163
+ <author />
164
+ </authors>
165
+ <commentList>
166
+ <comment ref="B4" authorId="0">
167
+ <text>
168
+ <r>
169
+ <rPr>
170
+ <sz val="10" />
171
+ <rFont val="Arial" />
172
+ <family val="2" />
173
+ </rPr>
174
+ <t>Kommentar fuer B4</t>
175
+ </r>
176
+ </text>
177
+ </comment>
178
+ <comment ref="B5" authorId="0">
179
+ <text>
180
+ <r>
181
+ <rPr>
182
+ <sz val="10" />
183
+ <rFont val="Arial" />
184
+ <family val="2" />
185
+ </rPr>
186
+ <t>Kommentar fuer B5</t>
187
+ </r>
188
+ </text>
189
+ </comment>
190
+ </commentList>
191
+ </comments>
192
+ =end
193
+ =begin
194
+ if @comments_doc[self.sheets.index(sheet)]
195
+ read_comments(sheet)
196
+ end
197
+ =end
198
+ end
199
+ end
200
+ end
@@ -0,0 +1,64 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx
6
+ class Styles < Excelx::Extractor
7
+ # convert internal excelx attribute to a format
8
+ def style_format(style)
9
+ id = num_fmt_ids[style.to_i]
10
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
11
+ end
12
+
13
+ def definitions
14
+ @definitions ||= extract_definitions
15
+ end
16
+
17
+ private
18
+
19
+ def num_fmt_ids
20
+ @num_fmt_ids ||= extract_num_fmt_ids
21
+ end
22
+
23
+ def num_fmts
24
+ @num_fmts ||= extract_num_fmts
25
+ end
26
+
27
+ def fonts
28
+ @fonts ||= extract_fonts
29
+ end
30
+
31
+ def extract_definitions
32
+ doc.xpath('//cellXfs').flat_map do |xfs|
33
+ xfs.children.map do |xf|
34
+ fonts[xf['fontId'].to_i]
35
+ end
36
+ end
37
+ end
38
+
39
+ def extract_fonts
40
+ doc.xpath('//fonts/font').map do |font_el|
41
+ Font.new.tap do |font|
42
+ font.bold = !font_el.xpath('./b').empty?
43
+ font.italic = !font_el.xpath('./i').empty?
44
+ font.underline = !font_el.xpath('./u').empty?
45
+ end
46
+ end
47
+ end
48
+
49
+ def extract_num_fmt_ids
50
+ doc.xpath('//cellXfs').flat_map do |xfs|
51
+ xfs.children.map do |xf|
52
+ xf['numFmtId']
53
+ end
54
+ end
55
+ end
56
+
57
+ def extract_num_fmts
58
+ Hash[doc.xpath('//numFmt').map do |num_fmt|
59
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
60
+ end]
61
+ end
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,59 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Workbook < Excelx::Extractor
6
+ class Label
7
+ attr_reader :sheet, :row, :col, :name
8
+
9
+ def initialize(name, sheet, row, col)
10
+ @name = name
11
+ @sheet = sheet
12
+ @row = row.to_i
13
+ @col = ::Roo::Utils.letter_to_number(col)
14
+ end
15
+
16
+ def key
17
+ [@row, @col]
18
+ end
19
+ end
20
+
21
+ def initialize(path)
22
+ super
23
+ fail ArgumentError, 'missing required workbook file' unless doc_exists?
24
+ end
25
+
26
+ def sheets
27
+ doc.xpath('//sheet')
28
+ end
29
+
30
+ # aka labels
31
+ def defined_names
32
+ Hash[doc.xpath('//definedName').map do |defined_name|
33
+ # "Sheet1!$C$5"
34
+ sheet, coordinates = defined_name.text.split('!$', 2)
35
+ col, row = coordinates.split('$')
36
+ name = defined_name['name']
37
+ [name, Label.new(name, sheet, row, col)]
38
+ end]
39
+ end
40
+
41
+ def base_date
42
+ @base_date ||=
43
+ begin
44
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
45
+ # it's set in the Workbook's workbookPr
46
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
47
+ result = Date.new(1899, 12, 30) # default
48
+ doc.css('workbookPr[date1904]').each do |workbookPr|
49
+ if workbookPr['date1904'] =~ /true|1/i
50
+ result = Date.new(1904, 01, 01)
51
+ break
52
+ end
53
+ end
54
+ result
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end