roo 1.13.2 → 2.0.0beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG +21 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo.rb +18 -24
  15. data/lib/roo/base.rb +303 -388
  16. data/lib/roo/csv.rb +120 -113
  17. data/lib/roo/excelx.rb +452 -484
  18. data/lib/roo/excelx/comments.rb +24 -0
  19. data/lib/roo/excelx/extractor.rb +20 -0
  20. data/lib/roo/excelx/relationships.rb +26 -0
  21. data/lib/roo/excelx/shared_strings.rb +40 -0
  22. data/lib/roo/excelx/sheet_doc.rb +202 -0
  23. data/lib/roo/excelx/styles.rb +62 -0
  24. data/lib/roo/excelx/workbook.rb +59 -0
  25. data/lib/roo/font.rb +17 -0
  26. data/lib/roo/libre_office.rb +5 -0
  27. data/lib/roo/link.rb +15 -0
  28. data/lib/roo/{openoffice.rb → open_office.rb} +678 -496
  29. data/lib/roo/spreadsheet.rb +20 -23
  30. data/lib/roo/utils.rb +78 -0
  31. data/lib/roo/version.rb +3 -0
  32. data/roo.gemspec +20 -204
  33. data/spec/lib/roo/base_spec.rb +1 -4
  34. data/spec/lib/roo/csv_spec.rb +21 -13
  35. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  36. data/spec/lib/roo/excelx_spec.rb +388 -11
  37. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  38. data/spec/lib/roo/openoffice_spec.rb +2 -8
  39. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  40. data/spec/lib/roo/utils_spec.rb +106 -0
  41. data/spec/spec_helper.rb +2 -1
  42. data/test/test_generic_spreadsheet.rb +19 -67
  43. data/test/test_helper.rb +9 -56
  44. data/test/test_roo.rb +252 -477
  45. metadata +63 -302
  46. data/Gemfile.lock +0 -78
  47. data/README.markdown +0 -126
  48. data/VERSION +0 -1
  49. data/lib/roo/excel.rb +0 -355
  50. data/lib/roo/excel2003xml.rb +0 -300
  51. data/lib/roo/google.rb +0 -292
  52. data/lib/roo/roo_rails_helper.rb +0 -83
  53. data/lib/roo/worksheet.rb +0 -18
  54. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  55. data/spec/lib/roo/excel_spec.rb +0 -17
  56. data/spec/lib/roo/google_spec.rb +0 -64
  57. data/test/files/1900_base.xls +0 -0
  58. data/test/files/1900_base.xlsx +0 -0
  59. data/test/files/1904_base.xls +0 -0
  60. data/test/files/1904_base.xlsx +0 -0
  61. data/test/files/Bibelbund.csv +0 -3741
  62. data/test/files/Bibelbund.ods +0 -0
  63. data/test/files/Bibelbund.xls +0 -0
  64. data/test/files/Bibelbund.xlsx +0 -0
  65. data/test/files/Bibelbund.xml +0 -62518
  66. data/test/files/Bibelbund1.ods +0 -0
  67. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  68. data/test/files/bad_excel_date.xls +0 -0
  69. data/test/files/bbu.ods +0 -0
  70. data/test/files/bbu.xls +0 -0
  71. data/test/files/bbu.xlsx +0 -0
  72. data/test/files/bbu.xml +0 -152
  73. data/test/files/bode-v1.ods.zip +0 -0
  74. data/test/files/bode-v1.xls.zip +0 -0
  75. data/test/files/boolean.csv +0 -2
  76. data/test/files/boolean.ods +0 -0
  77. data/test/files/boolean.xls +0 -0
  78. data/test/files/boolean.xlsx +0 -0
  79. data/test/files/boolean.xml +0 -112
  80. data/test/files/borders.ods +0 -0
  81. data/test/files/borders.xls +0 -0
  82. data/test/files/borders.xlsx +0 -0
  83. data/test/files/borders.xml +0 -144
  84. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  85. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  86. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  87. data/test/files/comments.ods +0 -0
  88. data/test/files/comments.xls +0 -0
  89. data/test/files/comments.xlsx +0 -0
  90. data/test/files/csvtypes.csv +0 -1
  91. data/test/files/datetime.ods +0 -0
  92. data/test/files/datetime.xls +0 -0
  93. data/test/files/datetime.xlsx +0 -0
  94. data/test/files/datetime.xml +0 -142
  95. data/test/files/datetime_floatconv.xls +0 -0
  96. data/test/files/datetime_floatconv.xml +0 -148
  97. data/test/files/dreimalvier.ods +0 -0
  98. data/test/files/emptysheets.ods +0 -0
  99. data/test/files/emptysheets.xls +0 -0
  100. data/test/files/emptysheets.xlsx +0 -0
  101. data/test/files/emptysheets.xml +0 -105
  102. data/test/files/excel2003.xml +0 -21140
  103. data/test/files/false_encoding.xls +0 -0
  104. data/test/files/false_encoding.xml +0 -132
  105. data/test/files/file_item_error.xlsx +0 -0
  106. data/test/files/formula.ods +0 -0
  107. data/test/files/formula.xls +0 -0
  108. data/test/files/formula.xlsx +0 -0
  109. data/test/files/formula.xml +0 -134
  110. data/test/files/formula_parse_error.xls +0 -0
  111. data/test/files/formula_parse_error.xml +0 -1833
  112. data/test/files/formula_string_error.xlsx +0 -0
  113. data/test/files/html-escape.ods +0 -0
  114. data/test/files/link.xls +0 -0
  115. data/test/files/link.xlsx +0 -0
  116. data/test/files/matrix.ods +0 -0
  117. data/test/files/matrix.xls +0 -0
  118. data/test/files/named_cells.ods +0 -0
  119. data/test/files/named_cells.xls +0 -0
  120. data/test/files/named_cells.xlsx +0 -0
  121. data/test/files/no_spreadsheet_file.txt +0 -1
  122. data/test/files/numbers1.csv +0 -18
  123. data/test/files/numbers1.ods +0 -0
  124. data/test/files/numbers1.xls +0 -0
  125. data/test/files/numbers1.xlsx +0 -0
  126. data/test/files/numbers1.xml +0 -312
  127. data/test/files/numeric-link.xlsx +0 -0
  128. data/test/files/only_one_sheet.ods +0 -0
  129. data/test/files/only_one_sheet.xls +0 -0
  130. data/test/files/only_one_sheet.xlsx +0 -0
  131. data/test/files/only_one_sheet.xml +0 -67
  132. data/test/files/paragraph.ods +0 -0
  133. data/test/files/paragraph.xls +0 -0
  134. data/test/files/paragraph.xlsx +0 -0
  135. data/test/files/paragraph.xml +0 -127
  136. data/test/files/prova.xls +0 -0
  137. data/test/files/ric.ods +0 -0
  138. data/test/files/simple_spreadsheet.ods +0 -0
  139. data/test/files/simple_spreadsheet.xls +0 -0
  140. data/test/files/simple_spreadsheet.xlsx +0 -0
  141. data/test/files/simple_spreadsheet.xml +0 -225
  142. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  143. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  144. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  145. data/test/files/so_datetime.csv +0 -7
  146. data/test/files/style.ods +0 -0
  147. data/test/files/style.xls +0 -0
  148. data/test/files/style.xlsx +0 -0
  149. data/test/files/style.xml +0 -154
  150. data/test/files/time-test.csv +0 -2
  151. data/test/files/time-test.ods +0 -0
  152. data/test/files/time-test.xls +0 -0
  153. data/test/files/time-test.xlsx +0 -0
  154. data/test/files/time-test.xml +0 -131
  155. data/test/files/type_excel.ods +0 -0
  156. data/test/files/type_excel.xlsx +0 -0
  157. data/test/files/type_excelx.ods +0 -0
  158. data/test/files/type_excelx.xls +0 -0
  159. data/test/files/type_openoffice.xls +0 -0
  160. data/test/files/type_openoffice.xlsx +0 -0
  161. data/test/files/whitespace.ods +0 -0
  162. data/test/files/whitespace.xls +0 -0
  163. data/test/files/whitespace.xlsx +0 -0
  164. data/test/files/whitespace.xml +0 -184
  165. data/test/rm_sub_test.rb +0 -12
  166. data/test/rm_test.rb +0 -7
  167. data/website/index.html +0 -385
  168. data/website/index.txt +0 -423
  169. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  170. data/website/stylesheets/screen.css +0 -130
  171. data/website/template.rhtml +0 -48
@@ -0,0 +1,24 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Comments < Excelx::Extractor
5
+
6
+ def comments
7
+ @comments ||= extract_comments
8
+ end
9
+
10
+ private
11
+
12
+ def extract_comments
13
+ if doc_exists?
14
+ Hash[doc.xpath("//comments/commentList/comment").map do |comment|
15
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
16
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
17
+ end]
18
+ else
19
+ {}
20
+ end
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,20 @@
1
+ module Roo
2
+ class Excelx::Extractor
3
+ def initialize(path)
4
+ @path = path
5
+ end
6
+
7
+ private
8
+
9
+ def doc
10
+ @doc ||=
11
+ if doc_exists?
12
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
13
+ end
14
+ end
15
+
16
+ def doc_exists?
17
+ @path && File.exist?(@path)
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Relationships < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @relationships ||= extract_relationships
11
+ end
12
+
13
+ private
14
+
15
+ def extract_relationships
16
+ if doc_exists?
17
+ Hash[doc.xpath("/Relationships/Relationship").map do |rel|
18
+ [rel.attribute('Id').text, rel]
19
+ end]
20
+ else
21
+ []
22
+ end
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,40 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SharedStrings < Excelx::Extractor
5
+ def [](index)
6
+ to_a[index]
7
+ end
8
+
9
+ def to_a
10
+ @array ||= extract_shared_strings
11
+ end
12
+
13
+ private
14
+
15
+ def extract_shared_strings
16
+ if doc_exists?
17
+ # read the shared strings xml document
18
+ doc.xpath("/sst/si").map do |si|
19
+ shared_string = ''
20
+ si.children.each do |elem|
21
+ case elem.name
22
+ when 'r'
23
+ elem.children.each do |r_elem|
24
+ if r_elem.name == 't'
25
+ shared_string << r_elem.content
26
+ end
27
+ end
28
+ when 't'
29
+ shared_string = elem.content
30
+ end
31
+ end
32
+ shared_string
33
+ end
34
+ else
35
+ []
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,202 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::SheetDoc < Excelx::Extractor
5
+ def initialize(path, relationships, styles, shared_strings, workbook, options = {})
6
+ super(path)
7
+ @options = options
8
+ @relationships = relationships
9
+ @styles = styles
10
+ @shared_strings = shared_strings
11
+ @workbook = workbook
12
+ end
13
+
14
+ def cells(relationships)
15
+ @cells ||= extract_cells(relationships)
16
+ end
17
+
18
+ def hyperlinks(relationships)
19
+ @hyperlinks ||= extract_hyperlinks(relationships)
20
+ end
21
+
22
+ # Get the dimensions for the sheet.
23
+ # This is the upper bound of cells that might
24
+ # be parsed. (the document may be sparse so cell count is only upper bound)
25
+ def dimensions
26
+ @dimensions ||= extract_dimensions
27
+ end
28
+
29
+ # Yield each row xml element to caller
30
+ def each_row_streaming(&block)
31
+ Roo::Utils.each_element(@path, 'row', &block)
32
+ end
33
+
34
+ # Yield each cell as Excelx::Cell to caller for given
35
+ # row xml
36
+ def each_cell(row_xml)
37
+ return [] unless row_xml
38
+ row_xml.children.each do |cell_element|
39
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
40
+ yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
41
+ end
42
+ end
43
+
44
+ private
45
+
46
+ def cell_from_xml(cell_xml, hyperlink)
47
+ # This is error prone, to_i will silently turn a nil into a 0
48
+ # and it works by coincidence that Format[0] is general
49
+ style = cell_xml['s'].to_i # should be here
50
+ # c: <c r="A5" s="2">
51
+ # <v>22606</v>
52
+ # </c>, format: , tmp_type: float
53
+ value_type =
54
+ case cell_xml['t']
55
+ when 's'
56
+ :shared
57
+ when 'b'
58
+ :boolean
59
+ # 2011-02-25 BEGIN
60
+ when 'str'
61
+ :string
62
+ # 2011-02-25 END
63
+ # 2011-09-15 BEGIN
64
+ when 'inlineStr'
65
+ :inlinestr
66
+ # 2011-09-15 END
67
+ else
68
+ format = @styles.style_format(style)
69
+ Excelx::Format.to_type(format)
70
+ end
71
+ formula = nil
72
+ row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
73
+ cell_xml.children.each do |cell|
74
+ case cell.name
75
+ when 'is'
76
+ cell.children.each do |inline_str|
77
+ if inline_str.name == 't'
78
+ return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
79
+ end
80
+ end
81
+ when 'f'
82
+ formula = cell.content
83
+ when 'v'
84
+ if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
85
+ value_type =
86
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
87
+ :datetime
88
+ else
89
+ :date
90
+ end
91
+ end
92
+ excelx_type = [:numeric_or_formula,format.to_s]
93
+ value =
94
+ case value_type
95
+ when :shared
96
+ value_type = :string
97
+ excelx_type = :string
98
+ @shared_strings[cell.content.to_i]
99
+ when :boolean
100
+ (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
101
+ when :date, :time, :datetime
102
+ cell.content
103
+ when :formula
104
+ cell.content.to_f
105
+ when :string
106
+ excelx_type = :string
107
+ cell.content
108
+ else
109
+ value_type = :float
110
+ cell.content
111
+ end
112
+ return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
113
+ end
114
+ end
115
+ Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
116
+ end
117
+
118
+ def extract_hyperlinks(relationships)
119
+ Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
120
+ if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
121
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
122
+ end
123
+ end.compact]
124
+ end
125
+
126
+ def expand_merged_ranges(cells)
127
+ # Extract merged ranges from xml
128
+ merges = {}
129
+ doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
130
+ tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
131
+ for row in tl[0]..br[0] do
132
+ for col in tl[1]..br[1] do
133
+ next if row == tl[0] && col == tl[1]
134
+ merges[[row,col]] = tl
135
+ end
136
+ end
137
+ end
138
+ # Duplicate value into all cells in merged range
139
+ merges.each do |dst, src|
140
+ cells[dst] = cells[src]
141
+ end
142
+ end
143
+
144
+ def extract_cells(relationships)
145
+ extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
146
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
147
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
148
+ end]
149
+ if @options[:expand_merged_ranges]
150
+ expand_merged_ranges(extracted_cells)
151
+ end
152
+ extracted_cells
153
+ end
154
+
155
+ def extract_dimensions
156
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
157
+ return dimension.attributes["ref"].value
158
+ end
159
+ end
160
+
161
+ =begin
162
+ Datei xl/comments1.xml
163
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
164
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
165
+ <authors>
166
+ <author />
167
+ </authors>
168
+ <commentList>
169
+ <comment ref="B4" authorId="0">
170
+ <text>
171
+ <r>
172
+ <rPr>
173
+ <sz val="10" />
174
+ <rFont val="Arial" />
175
+ <family val="2" />
176
+ </rPr>
177
+ <t>Kommentar fuer B4</t>
178
+ </r>
179
+ </text>
180
+ </comment>
181
+ <comment ref="B5" authorId="0">
182
+ <text>
183
+ <r>
184
+ <rPr>
185
+ <sz val="10" />
186
+ <rFont val="Arial" />
187
+ <family val="2" />
188
+ </rPr>
189
+ <t>Kommentar fuer B5</t>
190
+ </r>
191
+ </text>
192
+ </comment>
193
+ </commentList>
194
+ </comments>
195
+ =end
196
+ =begin
197
+ if @comments_doc[self.sheets.index(sheet)]
198
+ read_comments(sheet)
199
+ end
200
+ =end
201
+ end
202
+ end
@@ -0,0 +1,62 @@
1
+ require 'roo/font'
2
+ require 'roo/excelx/extractor'
3
+
4
+ module Roo
5
+ class Excelx::Styles < Excelx::Extractor
6
+ # convert internal excelx attribute to a format
7
+ def style_format(style)
8
+ id = num_fmt_ids[style.to_i]
9
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
10
+ end
11
+
12
+ def definitions
13
+ @definitions ||= extract_definitions
14
+ end
15
+
16
+ private
17
+
18
+ def num_fmt_ids
19
+ @num_fmt_ids ||= extract_num_fmt_ids
20
+ end
21
+
22
+ def num_fmts
23
+ @num_fmts ||= extract_num_fmts
24
+ end
25
+
26
+ def fonts
27
+ @fonts ||= extract_fonts
28
+ end
29
+
30
+ def extract_definitions
31
+ doc.xpath("//cellXfs").flat_map do |xfs|
32
+ xfs.children.map do |xf|
33
+ fonts[xf['fontId'].to_i]
34
+ end
35
+ end
36
+ end
37
+
38
+ def extract_fonts
39
+ doc.xpath("//fonts/font").map do |font_el|
40
+ Font.new.tap do |font|
41
+ font.bold = !font_el.xpath('./b').empty?
42
+ font.italic = !font_el.xpath('./i').empty?
43
+ font.underline = !font_el.xpath('./u').empty?
44
+ end
45
+ end
46
+ end
47
+
48
+ def extract_num_fmt_ids
49
+ doc.xpath("//cellXfs").flat_map do |xfs|
50
+ xfs.children.map do |xf|
51
+ xf['numFmtId']
52
+ end
53
+ end
54
+ end
55
+
56
+ def extract_num_fmts
57
+ Hash[doc.xpath("//numFmt").map do |num_fmt|
58
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
59
+ end]
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,59 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx::Workbook < Excelx::Extractor
5
+ class Label
6
+ attr_reader :sheet, :row, :col, :name
7
+
8
+ def initialize(name, sheet, row, col)
9
+ @name = name
10
+ @sheet = sheet
11
+ @row = row.to_i
12
+ @col = ::Roo::Utils.letter_to_number(col)
13
+ end
14
+
15
+ def key
16
+ [@row, @col]
17
+ end
18
+ end
19
+
20
+ def initialize(path)
21
+ super
22
+ if !doc_exists?
23
+ raise ArgumentError, 'missing required workbook file'
24
+ end
25
+ end
26
+
27
+ def sheets
28
+ doc.xpath("//sheet")
29
+ end
30
+
31
+ # aka labels
32
+ def defined_names
33
+ Hash[doc.xpath("//definedName").map do |defined_name|
34
+ # "Sheet1!$C$5"
35
+ sheet, coordinates = defined_name.text.split('!$', 2)
36
+ col,row = coordinates.split('$')
37
+ name = defined_name['name']
38
+ [name, Label.new(name, sheet,row,col)]
39
+ end]
40
+ end
41
+
42
+ def base_date
43
+ @base_date ||=
44
+ begin
45
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
46
+ # it's set in the Workbook's workbookPr
47
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
48
+ result = Date.new(1899,12,30) # default
49
+ doc.css("workbookPr[date1904]").each do |workbookPr|
50
+ if workbookPr["date1904"] =~ /true|1/i
51
+ result = Date.new(1904,01,01)
52
+ break
53
+ end
54
+ end
55
+ result
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,17 @@
1
+ module Roo
2
+ class Font
3
+ attr_accessor :bold, :italic, :underline
4
+
5
+ def bold?
6
+ @bold
7
+ end
8
+
9
+ def italic?
10
+ @italic
11
+ end
12
+
13
+ def underline?
14
+ @underline
15
+ end
16
+ end
17
+ end