roo 1.13.2 → 2.0.0beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG +21 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo.rb +18 -24
  15. data/lib/roo/base.rb +303 -388
  16. data/lib/roo/csv.rb +120 -113
  17. data/lib/roo/excelx.rb +452 -484
  18. data/lib/roo/excelx/comments.rb +24 -0
  19. data/lib/roo/excelx/extractor.rb +20 -0
  20. data/lib/roo/excelx/relationships.rb +26 -0
  21. data/lib/roo/excelx/shared_strings.rb +40 -0
  22. data/lib/roo/excelx/sheet_doc.rb +202 -0
  23. data/lib/roo/excelx/styles.rb +62 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/font.rb +17 -0
  26. data/lib/roo/libre_office.rb +5 -0
  27. data/lib/roo/link.rb +15 -0
  28. data/lib/roo/{openoffice.rb → open_office.rb} +678 -496
  29. data/lib/roo/spreadsheet.rb +20 -23
  30. data/lib/roo/utils.rb +78 -0
  31. data/lib/roo/version.rb +3 -0
  32. data/roo.gemspec +20 -204
  33. data/spec/lib/roo/base_spec.rb +1 -4
  34. data/spec/lib/roo/csv_spec.rb +21 -13
  35. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  36. data/spec/lib/roo/excelx_spec.rb +388 -11
  37. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  38. data/spec/lib/roo/openoffice_spec.rb +2 -8
  39. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  40. data/spec/lib/roo/utils_spec.rb +106 -0
  41. data/spec/spec_helper.rb +2 -1
  42. data/test/test_generic_spreadsheet.rb +19 -67
  43. data/test/test_helper.rb +9 -56
  44. data/test/test_roo.rb +252 -477
  45. metadata +63 -302
  46. data/Gemfile.lock +0 -78
  47. data/README.markdown +0 -126
  48. data/VERSION +0 -1
  49. data/lib/roo/excel.rb +0 -355
  50. data/lib/roo/excel2003xml.rb +0 -300
  51. data/lib/roo/google.rb +0 -292
  52. data/lib/roo/roo_rails_helper.rb +0 -83
  53. data/lib/roo/worksheet.rb +0 -18
  54. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  55. data/spec/lib/roo/excel_spec.rb +0 -17
  56. data/spec/lib/roo/google_spec.rb +0 -64
  57. data/test/files/1900_base.xls +0 -0
  58. data/test/files/1900_base.xlsx +0 -0
  59. data/test/files/1904_base.xls +0 -0
  60. data/test/files/1904_base.xlsx +0 -0
  61. data/test/files/Bibelbund.csv +0 -3741
  62. data/test/files/Bibelbund.ods +0 -0
  63. data/test/files/Bibelbund.xls +0 -0
  64. data/test/files/Bibelbund.xlsx +0 -0
  65. data/test/files/Bibelbund.xml +0 -62518
  66. data/test/files/Bibelbund1.ods +0 -0
  67. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  68. data/test/files/bad_excel_date.xls +0 -0
  69. data/test/files/bbu.ods +0 -0
  70. data/test/files/bbu.xls +0 -0
  71. data/test/files/bbu.xlsx +0 -0
  72. data/test/files/bbu.xml +0 -152
  73. data/test/files/bode-v1.ods.zip +0 -0
  74. data/test/files/bode-v1.xls.zip +0 -0
  75. data/test/files/boolean.csv +0 -2
  76. data/test/files/boolean.ods +0 -0
  77. data/test/files/boolean.xls +0 -0
  78. data/test/files/boolean.xlsx +0 -0
  79. data/test/files/boolean.xml +0 -112
  80. data/test/files/borders.ods +0 -0
  81. data/test/files/borders.xls +0 -0
  82. data/test/files/borders.xlsx +0 -0
  83. data/test/files/borders.xml +0 -144
  84. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  85. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  86. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  87. data/test/files/comments.ods +0 -0
  88. data/test/files/comments.xls +0 -0
  89. data/test/files/comments.xlsx +0 -0
  90. data/test/files/csvtypes.csv +0 -1
  91. data/test/files/datetime.ods +0 -0
  92. data/test/files/datetime.xls +0 -0
  93. data/test/files/datetime.xlsx +0 -0
  94. data/test/files/datetime.xml +0 -142
  95. data/test/files/datetime_floatconv.xls +0 -0
  96. data/test/files/datetime_floatconv.xml +0 -148
  97. data/test/files/dreimalvier.ods +0 -0
  98. data/test/files/emptysheets.ods +0 -0
  99. data/test/files/emptysheets.xls +0 -0
  100. data/test/files/emptysheets.xlsx +0 -0
  101. data/test/files/emptysheets.xml +0 -105
  102. data/test/files/excel2003.xml +0 -21140
  103. data/test/files/false_encoding.xls +0 -0
  104. data/test/files/false_encoding.xml +0 -132
  105. data/test/files/file_item_error.xlsx +0 -0
  106. data/test/files/formula.ods +0 -0
  107. data/test/files/formula.xls +0 -0
  108. data/test/files/formula.xlsx +0 -0
  109. data/test/files/formula.xml +0 -134
  110. data/test/files/formula_parse_error.xls +0 -0
  111. data/test/files/formula_parse_error.xml +0 -1833
  112. data/test/files/formula_string_error.xlsx +0 -0
  113. data/test/files/html-escape.ods +0 -0
  114. data/test/files/link.xls +0 -0
  115. data/test/files/link.xlsx +0 -0
  116. data/test/files/matrix.ods +0 -0
  117. data/test/files/matrix.xls +0 -0
  118. data/test/files/named_cells.ods +0 -0
  119. data/test/files/named_cells.xls +0 -0
  120. data/test/files/named_cells.xlsx +0 -0
  121. data/test/files/no_spreadsheet_file.txt +0 -1
  122. data/test/files/numbers1.csv +0 -18
  123. data/test/files/numbers1.ods +0 -0
  124. data/test/files/numbers1.xls +0 -0
  125. data/test/files/numbers1.xlsx +0 -0
  126. data/test/files/numbers1.xml +0 -312
  127. data/test/files/numeric-link.xlsx +0 -0
  128. data/test/files/only_one_sheet.ods +0 -0
  129. data/test/files/only_one_sheet.xls +0 -0
  130. data/test/files/only_one_sheet.xlsx +0 -0
  131. data/test/files/only_one_sheet.xml +0 -67
  132. data/test/files/paragraph.ods +0 -0
  133. data/test/files/paragraph.xls +0 -0
  134. data/test/files/paragraph.xlsx +0 -0
  135. data/test/files/paragraph.xml +0 -127
  136. data/test/files/prova.xls +0 -0
  137. data/test/files/ric.ods +0 -0
  138. data/test/files/simple_spreadsheet.ods +0 -0
  139. data/test/files/simple_spreadsheet.xls +0 -0
  140. data/test/files/simple_spreadsheet.xlsx +0 -0
  141. data/test/files/simple_spreadsheet.xml +0 -225
  142. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  143. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  144. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  145. data/test/files/so_datetime.csv +0 -7
  146. data/test/files/style.ods +0 -0
  147. data/test/files/style.xls +0 -0
  148. data/test/files/style.xlsx +0 -0
  149. data/test/files/style.xml +0 -154
  150. data/test/files/time-test.csv +0 -2
  151. data/test/files/time-test.ods +0 -0
  152. data/test/files/time-test.xls +0 -0
  153. data/test/files/time-test.xlsx +0 -0
  154. data/test/files/time-test.xml +0 -131
  155. data/test/files/type_excel.ods +0 -0
  156. data/test/files/type_excel.xlsx +0 -0
  157. data/test/files/type_excelx.ods +0 -0
  158. data/test/files/type_excelx.xls +0 -0
  159. data/test/files/type_openoffice.xls +0 -0
  160. data/test/files/type_openoffice.xlsx +0 -0
  161. data/test/files/whitespace.ods +0 -0
  162. data/test/files/whitespace.xls +0 -0
  163. data/test/files/whitespace.xlsx +0 -0
  164. data/test/files/whitespace.xml +0 -184
  165. data/test/rm_sub_test.rb +0 -12
  166. data/test/rm_test.rb +0 -7
  167. data/website/index.html +0 -385
  168. data/website/index.txt +0 -423
  169. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  170. data/website/stylesheets/screen.css +0 -130
  171. data/website/template.rhtml +0 -48
@@ -0,0 +1,24 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Comments < Excelx::Extractor
5
+
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
9
+
10
+ private
11
+
12
+ def extract_comments
13
+ if doc_exists?
14
+ Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
16
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
17
+ end]
18
+ else
19
+ {}
20
+ end
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ module Roo
2
+ class Excelx::Extractor
3
+ def initialize(path)
4
+ @path = path
5
+ end
6
+
7
+ private
8
+
9
+ def doc
10
+ @doc ||=
11
+ if doc_exists?
12
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
13
+ end
14
+ end
15
+
16
+ def doc_exists?
17
+ @path && File.exist?(@path)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Relationships < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @relationships ||= extract_relationships
11
+ end
12
+
13
+ private
14
+
15
+ def extract_relationships
16
+ if doc_exists?
17
+ Hash[doc.xpath("/Relationships/Relationship").map do |rel|
18
+ [rel.attribute('Id').text, rel]
19
+ end]
20
+ else
21
+ []
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SharedStrings < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @array ||= extract_shared_strings
11
+ end
12
+
13
+ private
14
+
15
+ def extract_shared_strings
16
+ if doc_exists?
17
+ # read the shared strings xml document
18
+ doc.xpath("/sst/si").map do |si|
19
+ shared_string = ''
20
+ si.children.each do |elem|
21
+ case elem.name
22
+ when 'r'
23
+ elem.children.each do |r_elem|
24
+ if r_elem.name == 't'
25
+ shared_string << r_elem.content
26
+ end
27
+ end
28
+ when 't'
29
+ shared_string = elem.content
30
+ end
31
+ end
32
+ shared_string
33
+ end
34
+ else
35
+ []
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,202 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SheetDoc < Excelx::Extractor
5
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
6
+ super(path)
7
+ @options = options
8
+ @relationships = relationships
9
+ @styles = styles
10
+ @shared_strings = shared_strings
11
+ @workbook = workbook
12
+ end
13
+
14
+ def cells(relationships)
15
+ @cells ||= extract_cells(relationships)
16
+ end
17
+
18
+ def hyperlinks(relationships)
19
+ @hyperlinks ||= extract_hyperlinks(relationships)
20
+ end
21
+
22
+ # Get the dimensions for the sheet.
23
+ # This is the upper bound of cells that might
24
+ # be parsed. (the document may be sparse so cell count is only upper bound)
25
+ def dimensions
26
+ @dimensions ||= extract_dimensions
27
+ end
28
+
29
+ # Yield each row xml element to caller
30
+ def each_row_streaming(&block)
31
+ Roo::Utils.each_element(@path, 'row', &block)
32
+ end
33
+
34
+ # Yield each cell as Excelx::Cell to caller for given
35
+ # row xml
36
+ def each_cell(row_xml)
37
+ return [] unless row_xml
38
+ row_xml.children.each do |cell_element|
39
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
40
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def cell_from_xml(cell_xml, hyperlink)
47
+ # This is error prone, to_i will silently turn a nil into a 0
48
+ # and it works by coincidence that Format[0] is general
49
+ style = cell_xml['s'].to_i # should be here
50
+ # c: <c r="A5" s="2">
51
+ # <v>22606</v>
52
+ # </c>, format: , tmp_type: float
53
+ value_type =
54
+ case cell_xml['t']
55
+ when 's'
56
+ :shared
57
+ when 'b'
58
+ :boolean
59
+ # 2011-02-25 BEGIN
60
+ when 'str'
61
+ :string
62
+ # 2011-02-25 END
63
+ # 2011-09-15 BEGIN
64
+ when 'inlineStr'
65
+ :inlinestr
66
+ # 2011-09-15 END
67
+ else
68
+ format = @styles.style_format(style)
69
+ Excelx::Format.to_type(format)
70
+ end
71
+ formula = nil
72
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
73
+ cell_xml.children.each do |cell|
74
+ case cell.name
75
+ when 'is'
76
+ cell.children.each do |inline_str|
77
+ if inline_str.name == 't'
78
+ return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
79
+ end
80
+ end
81
+ when 'f'
82
+ formula = cell.content
83
+ when 'v'
84
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
85
+ value_type =
86
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
87
+ :datetime
88
+ else
89
+ :date
90
+ end
91
+ end
92
+ excelx_type = [:numeric_or_formula,format.to_s]
93
+ value =
94
+ case value_type
95
+ when :shared
96
+ value_type = :string
97
+ excelx_type = :string
98
+ @shared_strings[cell.content.to_i]
99
+ when :boolean
100
+ (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
101
+ when :date, :time, :datetime
102
+ cell.content
103
+ when :formula
104
+ cell.content.to_f
105
+ when :string
106
+ excelx_type = :string
107
+ cell.content
108
+ else
109
+ value_type = :float
110
+ cell.content
111
+ end
112
+ return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
113
+ end
114
+ end
115
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
116
+ end
117
+
118
+ def extract_hyperlinks(relationships)
119
+ Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
120
+ if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
121
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
122
+ end
123
+ end.compact]
124
+ end
125
+
126
+ def expand_merged_ranges(cells)
127
+ # Extract merged ranges from xml
128
+ merges = {}
129
+ doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
130
+ tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
131
+ for row in tl[0]..br[0] do
132
+ for col in tl[1]..br[1] do
133
+ next if row == tl[0] && col == tl[1]
134
+ merges[[row,col]] = tl
135
+ end
136
+ end
137
+ end
138
+ # Duplicate value into all cells in merged range
139
+ merges.each do |dst, src|
140
+ cells[dst] = cells[src]
141
+ end
142
+ end
143
+
144
+ def extract_cells(relationships)
145
+ extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
146
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
147
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
148
+ end]
149
+ if @options[:expand_merged_ranges]
150
+ expand_merged_ranges(extracted_cells)
151
+ end
152
+ extracted_cells
153
+ end
154
+
155
+ def extract_dimensions
156
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
157
+ return dimension.attributes["ref"].value
158
+ end
159
+ end
160
+
161
+ =begin
162
+ Datei xl/comments1.xml
163
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
164
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
165
+ <authors>
166
+ <author />
167
+ </authors>
168
+ <commentList>
169
+ <comment ref="B4" authorId="0">
170
+ <text>
171
+ <r>
172
+ <rPr>
173
+ <sz val="10" />
174
+ <rFont val="Arial" />
175
+ <family val="2" />
176
+ </rPr>
177
+ <t>Kommentar fuer B4</t>
178
+ </r>
179
+ </text>
180
+ </comment>
181
+ <comment ref="B5" authorId="0">
182
+ <text>
183
+ <r>
184
+ <rPr>
185
+ <sz val="10" />
186
+ <rFont val="Arial" />
187
+ <family val="2" />
188
+ </rPr>
189
+ <t>Kommentar fuer B5</t>
190
+ </r>
191
+ </text>
192
+ </comment>
193
+ </commentList>
194
+ </comments>
195
+ =end
196
+ =begin
197
+ if @comments_doc[self.sheets.index(sheet)]
198
+ read_comments(sheet)
199
+ end
200
+ =end
201
+ end
202
+ end
@@ -0,0 +1,62 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx::Styles < Excelx::Extractor
6
+ # convert internal excelx attribute to a format
7
+ def style_format(style)
8
+ id = num_fmt_ids[style.to_i]
9
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
10
+ end
11
+
12
+ def definitions
13
+ @definitions ||= extract_definitions
14
+ end
15
+
16
+ private
17
+
18
+ def num_fmt_ids
19
+ @num_fmt_ids ||= extract_num_fmt_ids
20
+ end
21
+
22
+ def num_fmts
23
+ @num_fmts ||= extract_num_fmts
24
+ end
25
+
26
+ def fonts
27
+ @fonts ||= extract_fonts
28
+ end
29
+
30
+ def extract_definitions
31
+ doc.xpath("//cellXfs").flat_map do |xfs|
32
+ xfs.children.map do |xf|
33
+ fonts[xf['fontId'].to_i]
34
+ end
35
+ end
36
+ end
37
+
38
+ def extract_fonts
39
+ doc.xpath("//fonts/font").map do |font_el|
40
+ Font.new.tap do |font|
41
+ font.bold = !font_el.xpath('./b').empty?
42
+ font.italic = !font_el.xpath('./i').empty?
43
+ font.underline = !font_el.xpath('./u').empty?
44
+ end
45
+ end
46
+ end
47
+
48
+ def extract_num_fmt_ids
49
+ doc.xpath("//cellXfs").flat_map do |xfs|
50
+ xfs.children.map do |xf|
51
+ xf['numFmtId']
52
+ end
53
+ end
54
+ end
55
+
56
+ def extract_num_fmts
57
+ Hash[doc.xpath("//numFmt").map do |num_fmt|
58
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
59
+ end]
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,59 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Workbook < Excelx::Extractor
5
+ class Label
6
+ attr_reader :sheet, :row, :col, :name
7
+
8
+ def initialize(name, sheet, row, col)
9
+ @name = name
10
+ @sheet = sheet
11
+ @row = row.to_i
12
+ @col = ::Roo::Utils.letter_to_number(col)
13
+ end
14
+
15
+ def key
16
+ [@row, @col]
17
+ end
18
+ end
19
+
20
+ def initialize(path)
21
+ super
22
+ if !doc_exists?
23
+ raise ArgumentError, 'missing required workbook file'
24
+ end
25
+ end
26
+
27
+ def sheets
28
+ doc.xpath("//sheet")
29
+ end
30
+
31
+ # aka labels
32
+ def defined_names
33
+ Hash[doc.xpath("//definedName").map do |defined_name|
34
+ # "Sheet1!$C$5"
35
+ sheet, coordinates = defined_name.text.split('!$', 2)
36
+ col,row = coordinates.split('$')
37
+ name = defined_name['name']
38
+ [name, Label.new(name, sheet,row,col)]
39
+ end]
40
+ end
41
+
42
+ def base_date
43
+ @base_date ||=
44
+ begin
45
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
46
+ # it's set in the Workbook's workbookPr
47
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
48
+ result = Date.new(1899,12,30) # default
49
+ doc.css("workbookPr[date1904]").each do |workbookPr|
50
+ if workbookPr["date1904"] =~ /true|1/i
51
+ result = Date.new(1904,01,01)
52
+ break
53
+ end
54
+ end
55
+ result
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,17 @@
1
+ module Roo
2
+ class Font
3
+ attr_accessor :bold, :italic, :underline
4
+
5
+ def bold?
6
+ @bold
7
+ end
8
+
9
+ def italic?
10
+ @italic
11
+ end
12
+
13
+ def underline?
14
+ @underline
15
+ end
16
+ end
17
+ end